|             Line data    Source code 
       1              : 
       2              : namespace ippl {
       3              : 
       4              :     // LagrangeSpace constructor, which calls the FiniteElementSpace constructor,
       5              :     // and decomposes the elements among ranks according to layout.
       6              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
       7              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
       8          408 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::LagrangeSpace(
       9              :         UniformCartesian<T, Dim>& mesh, ElementType& ref_element, const QuadratureType& quadrature,
      10              :         const Layout_t& layout)
      11              :         : FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      12          408 :                              QuadratureType, FieldLHS, FieldRHS>(mesh, ref_element, quadrature) {
      13              :         // Assert that the dimension is either 1, 2 or 3.
      14              :         static_assert(Dim >= 1 && Dim <= 3,
      15              :                       "Finite Element space only supports 1D, 2D and 3D meshes");
      16              : 
      17              :         // Initialize the elementIndices view
      18          408 :         initializeElementIndices(layout);
      19          408 :     }
      20              : 
      21              :     // LagrangeSpace constructor, which calls the FiniteElementSpace constructor.
      22              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      23              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      24           12 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::LagrangeSpace(
      25              :         UniformCartesian<T, Dim>& mesh, ElementType& ref_element, const QuadratureType& quadrature)
      26              :         : FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      27           12 :                              QuadratureType, FieldLHS, FieldRHS>(mesh, ref_element, quadrature) {
      28              :         // Assert that the dimension is either 1, 2 or 3.
      29              :         static_assert(Dim >= 1 && Dim <= 3,
      30              :                       "Finite Element space only supports 1D, 2D and 3D meshes");
      31           12 :     }
      32              : 
      33              :     // LagrangeSpace initializer, to be made available to the FEMPoissonSolver 
      34              :     // such that we can call it from setRhs.
      35              :     // Sets the correct mesh ad decomposes the elements among ranks according to layout.
      36              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      37              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      38            0 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::initialize(
      39              :         UniformCartesian<T, Dim>& mesh, const Layout_t& layout)
      40              :     {
      41              :         FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      42            0 :                            QuadratureType, FieldLHS, FieldRHS>::setMesh(mesh);
      43              : 
      44              :         // Initialize the elementIndices view
      45            0 :         initializeElementIndices(layout);
      46            0 :     }
      47              : 
      48              :     // Initialize element indices Kokkos View by distributing elements among MPI ranks.
      49              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      50              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      51          408 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
      52              :                        FieldRHS>::initializeElementIndices(const Layout_t& layout) {
      53          408 :         const auto& ldom = layout.getLocalNDIndex();
      54          408 :         int npoints      = ldom.size();
      55          408 :         auto first       = ldom.first();
      56          408 :         auto last        = ldom.last();
      57          408 :         ippl::Vector<double, Dim> bounds;
      58              : 
      59         1260 :         for (size_t d = 0; d < Dim; ++d) {
      60          852 :             bounds[d] = this->nr_m[d] - 1;
      61              :         }
      62              : 
      63          408 :         int upperBoundaryPoints = -1;
      64              : 
      65              :         // We iterate over the local domain points, getting the corresponding elements, 
      66              :         // while tagging upper boundary points such that they can be removed after.
      67          408 :         Kokkos::View<size_t*> points("npoints", npoints);
      68          408 :         Kokkos::parallel_reduce(
      69            0 :             "ComputePoints", npoints,
      70        10758 :             KOKKOS_CLASS_LAMBDA(const int i, int& local) {
      71         9942 :                 int idx = i;
      72         9942 :                 indices_t val;
      73         9942 :                 bool isBoundary = false;
      74        37710 :                 for (unsigned int d = 0; d < Dim; ++d) {
      75        27768 :                     int range = last[d] - first[d] + 1;
      76        27768 :                     val[d]    = first[d] + (idx % range);
      77        27768 :                     idx /= range;
      78        27768 :                     if (val[d] == bounds[d]) {
      79         5358 :                         isBoundary = true;
      80              :                     }
      81              :                 }
      82         9942 :                 points(i) = (!isBoundary) * (this->getElementIndex(val));
      83         9942 :                 local += isBoundary;
      84         9942 :             },
      85          816 :             Kokkos::Sum<int>(upperBoundaryPoints));
      86          408 :         Kokkos::fence();
      87              : 
      88              :         // The elementIndices will be the same array as computed above,
      89              :         // with the tagged upper boundary points removed.
      90          408 :         int elementsPerRank = npoints - upperBoundaryPoints;
      91          408 :         elementIndices      = Kokkos::View<size_t*>("i", elementsPerRank);
      92          408 :         Kokkos::View<size_t> index("index");
      93              : 
      94          408 :         Kokkos::parallel_for(
      95        20700 :             "RemoveNaNs", npoints, KOKKOS_CLASS_LAMBDA(const int i) {
      96        19884 :                 if ((points(i) != 0) || (i == 0)) {
      97        11124 :                     const size_t idx    = Kokkos::atomic_fetch_add(&index(), 1);
      98        16686 :                     elementIndices(idx) = points(i);
      99              :                 }
     100              :             });
     101          408 :     }
     102              : 
     103              :     ///////////////////////////////////////////////////////////////////////
     104              :     /// Degree of Freedom operations //////////////////////////////////////
     105              :     ///////////////////////////////////////////////////////////////////////
     106              : 
     107              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     108              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     109           12 :     KOKKOS_FUNCTION size_t LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     110              :                                          FieldRHS>::numGlobalDOFs() const {
     111           12 :         size_t num_global_dofs = 1;
     112           36 :         for (size_t d = 0; d < Dim; ++d) {
     113           24 :             num_global_dofs *= this->nr_m[d] * Order;
     114              :         }
     115              : 
     116           12 :         return num_global_dofs;
     117              :     }
     118              : 
     119              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     120              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     121              :     KOKKOS_FUNCTION
     122       113032 :     size_t LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::getLocalDOFIndex
     123              :     (const size_t& elementIndex, const size_t& globalDOFIndex) const {
     124              :         static_assert(Dim == 1 || Dim == 2 || Dim == 3, "Dim must be 1, 2 or 3");
     125              :         // TODO fix not order independent, only works for order 1
     126              :         static_assert(Order == 1, "Only order 1 is supported at the moment");
     127              : 
     128              :         // Get all the global DOFs for the element
     129       113032 :         const Vector<size_t, numElementDOFs> global_dofs =
     130       113032 :             this->getGlobalDOFIndices(elementIndex);
     131              : 
     132              :         // Find the global DOF in the vector and return the local DOF index
     133              :         // Note: It is important that this only works because the global_dofs 
     134              :         // are already arranged in the correct order from getGlobalDOFIndices
     135       399232 :         for (size_t i = 0; i < global_dofs.dim; ++i) {
     136       398536 :             if (global_dofs[i] == globalDOFIndex) {
     137       112336 :                 return i;
     138              :             }
     139              :         }
     140          696 :         return std::numeric_limits<size_t>::quiet_NaN();
     141              :         //throw IpplException("LagrangeSpace::getLocalDOFIndex()",
     142              :         //                    "FEM Lagrange Space: Global DOF not found in specified element");
     143       113032 :     }
     144              : 
     145              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     146              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     147              :     KOKKOS_FUNCTION size_t
     148          336 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     149              :                   FieldRHS>::getGlobalDOFIndex(const size_t& elementIndex,
     150              :                                                const size_t& localDOFIndex) const {
     151          336 :         const auto global_dofs = this->getGlobalDOFIndices(elementIndex);
     152              : 
     153          672 :         return global_dofs[localDOFIndex];
     154          336 :     }
     155              : 
     156              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     157              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     158              :     KOKKOS_FUNCTION Vector<size_t, LagrangeSpace<T, Dim, Order, ElementType, QuadratureType,
     159              :                                                  FieldLHS, FieldRHS>::numElementDOFs>
     160          452 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     161              :                   FieldRHS>::getLocalDOFIndices() const {
     162          452 :         Vector<size_t, numElementDOFs> localDOFs;
     163              : 
     164         3756 :         for (size_t dof = 0; dof < numElementDOFs; ++dof) {
     165         3304 :             localDOFs[dof] = dof;
     166              :         }
     167              : 
     168          452 :         return localDOFs;
     169              :     }
     170              : 
     171              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     172              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     173              :     KOKKOS_FUNCTION Vector<size_t, LagrangeSpace<T, Dim, Order, ElementType, QuadratureType,
     174              :                                                  FieldLHS, FieldRHS>::numElementDOFs>
     175       137820 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     176              :                   FieldRHS>::getGlobalDOFIndices(const size_t& elementIndex) const {
     177       137820 :         Vector<size_t, numElementDOFs> globalDOFs(0);
     178              : 
     179              :         // get element pos
     180       137820 :         indices_t elementPos = this->getElementNDIndex(elementIndex);
     181              : 
     182              :         // Compute the vector to multiply the ndindex with
     183       137820 :         ippl::Vector<size_t, Dim> vec(1);
     184       325080 :         for (size_t d = 1; d < dim; ++d) {
     185       448028 :             for (size_t d2 = d; d2 < Dim; ++d2) {
     186       260768 :                 vec[d2] *= this->nr_m[d - 1];
     187              :             }
     188              :         }
     189       137820 :         vec *= Order;  // Multiply each dimension by the order
     190       137820 :         size_t smallestGlobalDOF = elementPos.dot(vec);
     191              : 
     192              :         // Add the vertex DOFs
     193       137820 :         globalDOFs[0] = smallestGlobalDOF;
     194       137820 :         globalDOFs[1] = smallestGlobalDOF + Order;
     195              : 
     196              :         if (Dim >= 2) {
     197       113752 :             globalDOFs[2] = globalDOFs[1] + this->nr_m[0] * Order;
     198       113752 :             globalDOFs[3] = globalDOFs[0] + this->nr_m[0] * Order;
     199              :         }
     200              :         if (Dim >= 3) {
     201        73508 :             globalDOFs[4] = globalDOFs[0] + this->nr_m[1] * this->nr_m[0] * Order;
     202        73508 :             globalDOFs[5] = globalDOFs[1] + this->nr_m[1] * this->nr_m[0] * Order;
     203        73508 :             globalDOFs[6] = globalDOFs[2] + this->nr_m[1] * this->nr_m[0] * Order;
     204        73508 :             globalDOFs[7] = globalDOFs[3] + this->nr_m[1] * this->nr_m[0] * Order;
     205              :         }
     206              : 
     207              :         if (Order > 1) {
     208              :             // If the order is greater than 1, there are edge and face DOFs, otherwise the work is
     209              :             // done
     210              : 
     211              :             // Add the edge DOFs
     212              :             if (Dim >= 2) {
     213              :                 for (size_t i = 0; i < Order - 1; ++i) {
     214              :                     globalDOFs[8 + i]                   = globalDOFs[0] + i + 1;
     215              :                     globalDOFs[8 + Order - 1 + i]       = globalDOFs[1] + (i + 1) * this->nr_m[1];
     216              :                     globalDOFs[8 + 2 * (Order - 1) + i] = globalDOFs[2] - (i + 1);
     217              :                     globalDOFs[8 + 3 * (Order - 1) + i] = globalDOFs[3] - (i + 1) * this->nr_m[1];
     218              :                 }
     219              :             }
     220              :             if (Dim >= 3) {
     221              :                 // TODO
     222              :             }
     223              : 
     224              :             // Add the face DOFs
     225              :             if (Dim >= 2) {
     226              :                 for (size_t i = 0; i < Order - 1; ++i) {
     227              :                     for (size_t j = 0; j < Order - 1; ++j) {
     228              :                         // TODO CHECK
     229              :                         globalDOFs[8 + 4 * (Order - 1) + i * (Order - 1) + j] =
     230              :                             globalDOFs[0] + (i + 1) + (j + 1) * this->nr_m[1];
     231              :                         globalDOFs[8 + 4 * (Order - 1) + (Order - 1) * (Order - 1) + i * (Order - 1)
     232              :                                    + j] = globalDOFs[1] + (i + 1) + (j + 1) * this->nr_m[1];
     233              :                         globalDOFs[8 + 4 * (Order - 1) + 2 * (Order - 1) * (Order - 1)
     234              :                                    + i * (Order - 1) + j] =
     235              :                             globalDOFs[2] - (i + 1) + (j + 1) * this->nr_m[1];
     236              :                         globalDOFs[8 + 4 * (Order - 1) + 3 * (Order - 1) * (Order - 1)
     237              :                                    + i * (Order - 1) + j] =
     238              :                             globalDOFs[3] - (i + 1) + (j + 1) * this->nr_m[1];
     239              :                     }
     240              :                 }
     241              :             }
     242              :         }
     243              : 
     244       137820 :         return globalDOFs;
     245       137820 :     }
     246              : 
     247              :     ///////////////////////////////////////////////////////////////////////
     248              :     /// Basis functions and gradients /////////////////////////////////////
     249              :     ///////////////////////////////////////////////////////////////////////
     250              : 
     251              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     252              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     253              :     KOKKOS_FUNCTION T
     254       379040 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::
     255              :         evaluateRefElementShapeFunction(
     256              :             const size_t& localDOF,
     257              :             const LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     258              :                                 FieldRHS>::point_t& localPoint) const {
     259              :         static_assert(Order == 1, "Only order 1 is supported at the moment");
     260              :         // Assert that the local vertex index is valid.
     261       379040 :         assert(localDOF < numElementDOFs
     262              :                && "The local vertex index is invalid");  // TODO assumes 1st order Lagrange
     263              : 
     264       379040 :         assert(this->ref_element_m.isPointInRefElement(localPoint)
     265              :                && "Point is not in reference element");
     266              : 
     267              :         // Get the local vertex indices for the local vertex index.
     268              :         // TODO fix not order independent, only works for order 1
     269       379040 :         const point_t ref_element_point = this->ref_element_m.getLocalVertices()[localDOF];
     270              : 
     271              :         // The variable that accumulates the product of the shape functions.
     272       379040 :         T product = 1;
     273              : 
     274      1444944 :         for (size_t d = 0; d < Dim; d++) {
     275      1065904 :             if (localPoint[d] < ref_element_point[d]) {
     276       532952 :                 product *= localPoint[d];
     277              :             } else {
     278       532952 :                 product *= 1.0 - localPoint[d];
     279              :             }
     280              :         }
     281              : 
     282       379040 :         return product;
     283       379040 :     }
     284              : 
     285              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     286              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     287              :     KOKKOS_FUNCTION typename LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     288              :                                            FieldRHS>::point_t
     289       262600 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::
     290              :         evaluateRefElementShapeFunctionGradient(
     291              :             const size_t& localDOF,
     292              :             const LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     293              :                                 FieldRHS>::point_t& localPoint) const {
     294              :         // TODO fix not order independent, only works for order 1
     295              :         static_assert(Order == 1 && "Only order 1 is supported at the moment");
     296              : 
     297              :         // Assert that the local vertex index is valid.
     298       262600 :         assert(localDOF < numElementDOFs && "The local vertex index is invalid");
     299              : 
     300       262600 :         assert(this->ref_element_m.isPointInRefElement(localPoint)
     301              :                && "Point is not in reference element");
     302              : 
     303              :         // Get the local dof nd_index
     304       262600 :         const vertex_points_t local_vertex_points = this->ref_element_m.getLocalVertices();
     305              : 
     306       262600 :         const point_t& local_vertex_point = local_vertex_points[localDOF];
     307              : 
     308       262600 :         point_t gradient(1);
     309              : 
     310              :         // To construct the gradient we need to loop over the dimensions and multiply the
     311              :         // shape functions in each dimension except the current one. The one of the current
     312              :         // dimension is replaced by the derivative of the shape function in that dimension,
     313              :         // which is either 1 or -1.
     314      1043664 :         for (size_t d = 0; d < Dim; d++) {
     315              :             // The variable that accumulates the product of the shape functions.
     316       781064 :             T product = 1;
     317              : 
     318      3111120 :             for (size_t d2 = 0; d2 < Dim; d2++) {
     319      2330056 :                 if (d2 == d) {
     320       781064 :                     if (localPoint[d] < local_vertex_point[d]) {
     321       390532 :                         product *= 1;
     322              :                     } else {
     323       390532 :                         product *= -1;
     324              :                     }
     325              :                 } else {
     326      1548992 :                     if (localPoint[d2] < local_vertex_point[d2]) {
     327       774496 :                         product *= localPoint[d2];
     328              :                     } else {
     329       774496 :                         product *= 1.0 - localPoint[d2];
     330              :                     }
     331              :                 }
     332              :             }
     333              : 
     334       781064 :             gradient[d] = product;
     335              :         }
     336              : 
     337       262600 :         return gradient;
     338       262600 :     }
     339              : 
     340              :     ///////////////////////////////////////////////////////////////////////
     341              :     /// Assembly operations ///////////////////////////////////////////////
     342              :     ///////////////////////////////////////////////////////////////////////
     343              : 
     344              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     345              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     346              :     template <typename F>
     347            8 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     348              :                            FieldRHS>::evaluateAx(FieldLHS& field, F& evalFunction) const {
     349            8 :         Inform m("");
     350              : 
     351              :         // start a timer
     352            8 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     353            8 :         IpplTimings::startTimer(evalAx);
     354              : 
     355              :         // get number of ghost cells in field
     356            8 :         const int nghost = field.getNghost();
     357              : 
     358              :         // create a new field for result with view initialized to zero (views are initialized to
     359              :         // zero by default)
     360            8 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     361              : 
     362              :         // List of quadrature weights
     363            8 :         const Vector<T, QuadratureType::numElementNodes> w =
     364            8 :             this->quadrature_m.getWeightsForRefElement();
     365              : 
     366              :         // List of quadrature nodes
     367            8 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     368            8 :             this->quadrature_m.getIntegrationNodesForRefElement();
     369              : 
     370              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     371              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     372            8 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     373           16 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     374           48 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     375           40 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     376              :             }
     377              :         }
     378              : 
     379              :         // Make local element matrix -- does not change through the element mesh
     380              :         // Element matrix
     381            8 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     382              : 
     383              :         // 1. Compute the Galerkin element matrix A_K
     384           48 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     385          312 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     386          272 :                 A_K[i][j] = 0.0;
     387          544 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     388          272 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     389              :                 }
     390              :             }
     391              :         }
     392              : 
     393              :         // Get field data and atomic result data,
     394              :         // since it will be added to during the kokkos loop
     395            8 :         ViewType view             = field.getView();
     396            8 :         AtomicViewType resultView = resultField.getView();
     397              : 
     398              :         // Get boundary conditions from field
     399            8 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     400            8 :         FieldBC bcType = bcField[0]->getBCType();
     401              : 
     402              :         // Get domain information
     403            8 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     404              : 
     405              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     406              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     407              : 
     408              :         // start a timer
     409            8 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     410            8 :         IpplTimings::startTimer(outer_loop);
     411              : 
     412              :         // Loop over elements to compute contributions
     413            8 :         Kokkos::parallel_for(
     414           16 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     415          152 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     416            0 :                 const size_t elementIndex                            = elementIndices(index);
     417          136 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     418            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     419          136 :                     this->getGlobalDOFIndices(elementIndex);
     420          136 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     421              : 
     422         1176 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     423         1040 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     424              :                 }
     425              : 
     426              :                 // local DOF indices (both i and j go from 0 to numDOFs-1 in the element)
     427              :                 size_t i, j;
     428              : 
     429              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     430              :                 // each dimension)
     431          136 :                 indices_t I_nd, J_nd;
     432              : 
     433              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     434         1176 :                 for (i = 0; i < numElementDOFs; ++i) {
     435            0 :                     I_nd = global_dof_ndindices[i];
     436              : 
     437              :                     // Handle boundary DOFs
     438              :                     // If Zero Dirichlet BCs, skip this DOF
     439              :                     // If Constant Dirichlet BCs, identity
     440         1040 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     441            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     442            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     443              :                         }
     444            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     445            0 :                         continue;
     446         1040 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     447            0 :                         continue;
     448              :                     }
     449              : 
     450              :                     // get the appropriate index for the Kokkos view of the field
     451         1752 :                     for (unsigned d = 0; d < Dim; ++d) {
     452            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     453              :                     }
     454              : 
     455         3924 :                     for (j = 0; j < numElementDOFs; ++j) {
     456            0 :                         J_nd = global_dof_ndindices[j];
     457              : 
     458              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     459            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     460         3480 :                             && this->isDOFOnBoundary(J_nd)) {
     461            0 :                             continue;
     462              :                         }
     463              : 
     464              :                         // get the appropriate index for the Kokkos view of the field
     465         8040 :                         for (unsigned d = 0; d < Dim; ++d) {
     466            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     467              :                         }
     468              : 
     469         2020 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     470              :                     }
     471              :                 }
     472            0 :             });
     473            8 :         IpplTimings::stopTimer(outer_loop);
     474              : 
     475            8 :         if (bcType == PERIODIC_FACE) {
     476            0 :             resultField.accumulateHalo();
     477            0 :             bcField.apply(resultField);
     478            0 :             bcField.assignGhostToPhysical(resultField);
     479              :         } else {
     480            8 :             resultField.accumulateHalo_noghost();
     481              :         }
     482              : 
     483            8 :         IpplTimings::stopTimer(evalAx);
     484              : 
     485            8 :         return resultField;
     486            8 :     }
     487              : 
     488              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     489              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     490              :     template <typename F>
     491            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     492              :                            FieldRHS>::evaluateAx_lower(FieldLHS& field, F& evalFunction) const {
     493            0 :         Inform m("");
     494              : 
     495              :         // start a timer
     496            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     497            0 :         IpplTimings::startTimer(evalAx);
     498              : 
     499              :         // get number of ghost cells in field
     500            0 :         const int nghost = field.getNghost();
     501              : 
     502              :         // create a new field for result with view initialized to zero (views are initialized to
     503              :         // zero by default)
     504            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     505              : 
     506              :         // List of quadrature weights
     507            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     508            0 :             this->quadrature_m.getWeightsForRefElement();
     509              : 
     510              :         // List of quadrature nodes
     511            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     512            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     513              : 
     514              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     515              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     516            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     517            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     518            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     519            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     520              :             }
     521              :         }
     522              : 
     523              :         // Make local element matrix -- does not change through the element mesh
     524              :         // Element matrix
     525            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     526              : 
     527              :         // 1. Compute the Galerkin element matrix A_K
     528            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     529            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     530            0 :                 A_K[i][j] = 0.0;
     531            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     532            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     533              :                 }
     534              :             }
     535              :         }
     536              : 
     537              :         // Get field data and atomic result data,
     538              :         // since it will be added to during the kokkos loop
     539            0 :         ViewType view             = field.getView();
     540            0 :         AtomicViewType resultView = resultField.getView();
     541              : 
     542              :         // Get boundary conditions from field
     543            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     544            0 :         FieldBC bcType = bcField[0]->getBCType();
     545              : 
     546              :         // Get domain information
     547            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     548              : 
     549              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     550              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     551              : 
     552              :         // start a timer
     553            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     554            0 :         IpplTimings::startTimer(outer_loop);
     555              : 
     556              :         // Loop over elements to compute contributions
     557            0 :         Kokkos::parallel_for(
     558            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     559            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     560            0 :                 const size_t elementIndex                            = elementIndices(index);
     561            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     562            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     563            0 :                     this->getGlobalDOFIndices(elementIndex);
     564            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     565              : 
     566            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     567            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     568              :                 }
     569              : 
     570              :                 // local DOF indices
     571              :                 size_t i, j;
     572              : 
     573              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     574              :                 // each dimension)
     575            0 :                 indices_t I_nd, J_nd;
     576              : 
     577              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     578            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     579            0 :                     I_nd = global_dof_ndindices[i];
     580              : 
     581              :                     // Handle boundary DOFs
     582              :                     // If Zero Dirichlet BCs, skip this DOF
     583              :                     // If Constant Dirichlet BCs, identity
     584            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     585            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     586            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     587              :                         }
     588            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     589            0 :                         continue;
     590            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     591            0 :                         continue;
     592              :                     }
     593              : 
     594              :                     // get the appropriate index for the Kokkos view of the field
     595            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     596            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     597              :                     }
     598              : 
     599            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     600            0 :                         J_nd = global_dof_ndindices[j];
     601              : 
     602            0 :                         if (global_dofs[i] >= global_dofs[j]) {
     603            0 :                             continue;
     604              :                         }
     605              : 
     606              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     607            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     608            0 :                             && this->isDOFOnBoundary(J_nd)) {
     609            0 :                             continue;
     610              :                         }
     611              : 
     612              :                         // get the appropriate index for the Kokkos view of the field
     613            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     614            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     615              :                         }
     616              : 
     617            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     618              :                     }
     619              :                 }
     620            0 :             });
     621            0 :         IpplTimings::stopTimer(outer_loop);
     622              : 
     623            0 :         if (bcType == PERIODIC_FACE) {
     624            0 :             resultField.accumulateHalo();
     625            0 :             bcField.apply(resultField);
     626            0 :             bcField.assignGhostToPhysical(resultField);
     627              :         } else {
     628            0 :             resultField.accumulateHalo_noghost();
     629              :         }
     630              : 
     631            0 :         IpplTimings::stopTimer(evalAx);
     632              : 
     633            0 :         return resultField;
     634            0 :     }
     635              : 
     636              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     637              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     638              :     template <typename F>
     639            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     640              :                            FieldRHS>::evaluateAx_upper(FieldLHS& field, F& evalFunction) const {
     641            0 :         Inform m("");
     642              : 
     643              :         // start a timer
     644            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     645            0 :         IpplTimings::startTimer(evalAx);
     646              : 
     647              :         // get number of ghost cells in field
     648            0 :         const int nghost = field.getNghost();
     649              : 
     650              :         // create a new field for result with view initialized to zero (views are initialized to
     651              :         // zero by default)
     652            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     653              : 
     654              :         // List of quadrature weights
     655            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     656            0 :             this->quadrature_m.getWeightsForRefElement();
     657              : 
     658              :         // List of quadrature nodes
     659            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     660            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     661              : 
     662              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     663              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     664            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     665            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     666            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     667            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     668              :             }
     669              :         }
     670              : 
     671              :         // Make local element matrix -- does not change through the element mesh
     672              :         // Element matrix
     673            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     674              : 
     675              :         // 1. Compute the Galerkin element matrix A_K
     676            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     677            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     678            0 :                 A_K[i][j] = 0.0;
     679            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     680            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     681              :                 }
     682              :             }
     683              :         }
     684              : 
     685              :         // Get field data and atomic result data,
     686              :         // since it will be added to during the kokkos loop
     687            0 :         ViewType view             = field.getView();
     688            0 :         AtomicViewType resultView = resultField.getView();
     689              : 
     690              :         // Get boundary conditions from field
     691            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     692            0 :         FieldBC bcType = bcField[0]->getBCType();
     693              : 
     694              :         // Get domain information
     695            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     696              : 
     697              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     698              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     699              : 
     700              :         // start a timer
     701            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     702            0 :         IpplTimings::startTimer(outer_loop);
     703              : 
     704              :         // Loop over elements to compute contributions
     705            0 :         Kokkos::parallel_for(
     706            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     707            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     708            0 :                 const size_t elementIndex                            = elementIndices(index);
     709            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     710            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     711            0 :                     this->getGlobalDOFIndices(elementIndex);
     712            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     713              : 
     714            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     715            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     716              :                 }
     717              : 
     718              :                 // local DOF indices
     719              :                 size_t i, j;
     720              : 
     721              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     722              :                 // each dimension)
     723            0 :                 indices_t I_nd, J_nd;
     724              : 
     725              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     726            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     727            0 :                     I_nd = global_dof_ndindices[i];
     728              : 
     729              :                     // Handle boundary DOFs
     730              :                     // If Zero Dirichlet BCs, skip this DOF
     731              :                     // If Constant Dirichlet BCs, identity
     732            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     733            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     734            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     735              :                         }
     736            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     737            0 :                         continue;
     738            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     739            0 :                         continue;
     740              :                     }
     741              : 
     742              :                     // get the appropriate index for the Kokkos view of the field
     743            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     744            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     745              :                     }
     746              : 
     747            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     748            0 :                         J_nd = global_dof_ndindices[j];
     749              : 
     750            0 :                         if (global_dofs[i] <= global_dofs[j]) {
     751            0 :                             continue;
     752              :                         }
     753              : 
     754              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     755            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     756            0 :                             && this->isDOFOnBoundary(J_nd)) {
     757            0 :                             continue;
     758              :                         }
     759              : 
     760              :                         // get the appropriate index for the Kokkos view of the field
     761            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     762            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     763              :                         }
     764              : 
     765            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     766              :                     }
     767              :                 }
     768            0 :             });
     769            0 :         IpplTimings::stopTimer(outer_loop);
     770              : 
     771            0 :         if (bcType == PERIODIC_FACE) {
     772            0 :             resultField.accumulateHalo();
     773            0 :             bcField.apply(resultField);
     774            0 :             bcField.assignGhostToPhysical(resultField);
     775              :         } else {
     776            0 :             resultField.accumulateHalo_noghost();
     777              :         }
     778              : 
     779            0 :         IpplTimings::stopTimer(evalAx);
     780              : 
     781            0 :         return resultField;
     782            0 :     }
     783              : 
     784              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     785              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     786              :     template <typename F>
     787            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     788              :                            FieldRHS>::evaluateAx_upperlower(FieldLHS& field, F& evalFunction) const {
     789            0 :         Inform m("");
     790              : 
     791              :         // start a timer
     792            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     793            0 :         IpplTimings::startTimer(evalAx);
     794              : 
     795              :         // get number of ghost cells in field
     796            0 :         const int nghost = field.getNghost();
     797              : 
     798              :         // create a new field for result with view initialized to zero (views are initialized to
     799              :         // zero by default)
     800            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     801              : 
     802              :         // List of quadrature weights
     803            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     804            0 :             this->quadrature_m.getWeightsForRefElement();
     805              : 
     806              :         // List of quadrature nodes
     807            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     808            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     809              : 
     810              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     811              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     812            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     813            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     814            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     815            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     816              :             }
     817              :         }
     818              : 
     819              :         // Make local element matrix -- does not change through the element mesh
     820              :         // Element matrix
     821            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     822              : 
     823              :         // 1. Compute the Galerkin element matrix A_K
     824            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     825            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     826            0 :                 A_K[i][j] = 0.0;
     827            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     828            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     829              :                 }
     830              :             }
     831              :         }
     832              : 
     833              :         // Get field data and atomic result data,
     834              :         // since it will be added to during the kokkos loop
     835            0 :         ViewType view             = field.getView();
     836            0 :         AtomicViewType resultView = resultField.getView();
     837              : 
     838              :         // Get boundary conditions from field
     839            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     840            0 :         FieldBC bcType = bcField[0]->getBCType();
     841              : 
     842              :         // Get domain information
     843            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     844              : 
     845              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     846              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     847              : 
     848              :         // start a timer
     849            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     850            0 :         IpplTimings::startTimer(outer_loop);
     851              : 
     852              :         // Loop over elements to compute contributions
     853            0 :         Kokkos::parallel_for(
     854            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     855            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     856            0 :                 const size_t elementIndex                            = elementIndices(index);
     857            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     858            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     859            0 :                     this->getGlobalDOFIndices(elementIndex);
     860            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     861              : 
     862            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     863            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     864              :                 }
     865              : 
     866              :                 // local DOF indices
     867              :                 size_t i, j;
     868              : 
     869              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     870              :                 // each dimension)
     871            0 :                 indices_t I_nd, J_nd;
     872              : 
     873              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     874            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     875            0 :                     I_nd = global_dof_ndindices[i];
     876              : 
     877              :                     // Handle boundary DOFs
     878              :                     // If Zero Dirichlet BCs, skip this DOF
     879              :                     // If Constant Dirichlet BCs, identity
     880            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     881            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     882            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     883              :                         }
     884            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     885            0 :                         continue;
     886            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     887            0 :                         continue;
     888              :                     }
     889              : 
     890              :                     // get the appropriate index for the Kokkos view of the field
     891            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     892            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     893              :                     }
     894              : 
     895            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     896            0 :                         J_nd = global_dof_ndindices[j];
     897              : 
     898            0 :                         if (global_dofs[i] == global_dofs[j]) {
     899            0 :                             continue;
     900              :                         }
     901              : 
     902              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     903            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     904            0 :                             && this->isDOFOnBoundary(J_nd)) {
     905            0 :                             continue;
     906              :                         }
     907              : 
     908              :                         // get the appropriate index for the Kokkos view of the field
     909            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     910            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     911              :                         }
     912              : 
     913            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     914              :                     }
     915              :                 }
     916            0 :             });
     917            0 :         IpplTimings::stopTimer(outer_loop);
     918              : 
     919            0 :         if (bcType == PERIODIC_FACE) {
     920            0 :             resultField.accumulateHalo();
     921            0 :             bcField.apply(resultField);
     922            0 :             bcField.assignGhostToPhysical(resultField);
     923              :         } else {
     924            0 :             resultField.accumulateHalo_noghost();
     925              :         }
     926              : 
     927            0 :         IpplTimings::stopTimer(evalAx);
     928              : 
     929            0 :         return resultField;
     930            0 :     }
     931              : 
     932              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     933              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     934              :     template <typename F>
     935            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     936              :                            FieldRHS>::evaluateAx_inversediag(FieldLHS& field, F& evalFunction) const {
     937            0 :         Inform m("");
     938              : 
     939              :         // start a timer
     940            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     941            0 :         IpplTimings::startTimer(evalAx);
     942              : 
     943              :         // get number of ghost cells in field
     944            0 :         const int nghost = field.getNghost();
     945              : 
     946              :         // create a new field for result with view initialized to zero (views are initialized to
     947              :         // zero by default)
     948            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     949              : 
     950              :         // List of quadrature weights
     951            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     952            0 :             this->quadrature_m.getWeightsForRefElement();
     953              : 
     954              :         // List of quadrature nodes
     955            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     956            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     957              : 
     958              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     959              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     960            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     961            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     962            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     963            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     964              :             }
     965              :         }
     966              : 
     967              :         // Make local element matrix -- does not change through the element mesh
     968              :         // Element matrix
     969            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     970              : 
     971              :         // 1. Compute the Galerkin element matrix A_K
     972            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     973            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     974            0 :                 A_K[i][j] = 0.0;
     975            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     976            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     977              :                 }
     978              :             }
     979              :         }
     980              : 
     981              :         // Get field data and atomic result data,
     982              :         // since it will be added to during the kokkos loop
     983            0 :         ViewType view             = field.getView();
     984            0 :         AtomicViewType resultView = resultField.getView();
     985              : 
     986              :         // Get boundary conditions from field
     987            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     988            0 :         FieldBC bcType = bcField[0]->getBCType();
     989              : 
     990              :         // Get domain information
     991            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     992              : 
     993              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     994              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     995              : 
     996              :         // start a timer
     997            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     998            0 :         IpplTimings::startTimer(outer_loop);
     999              : 
    1000              :         // Loop over elements to compute contributions
    1001            0 :         Kokkos::parallel_for(
    1002            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1003            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1004            0 :                 const size_t elementIndex                            = elementIndices(index);
    1005            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1006            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1007            0 :                     this->getGlobalDOFIndices(elementIndex);
    1008            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1009              : 
    1010            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1011            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1012              :                 }
    1013              : 
    1014              :                 // local DOF indices
    1015              :                 size_t i, j;
    1016              : 
    1017              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1018              :                 // each dimension)
    1019            0 :                 indices_t I_nd, J_nd;
    1020              : 
    1021              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1022            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1023            0 :                     I_nd = global_dof_ndindices[i];
    1024              : 
    1025              :                     // Handle boundary DOFs
    1026              :                     // If Zero Dirichlet BCs, skip this DOF
    1027              :                     // If Constant Dirichlet BCs, identity
    1028            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1029            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1030            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1031              :                         }
    1032            0 :                         apply(resultView, I_nd) = 1.0;
    1033            0 :                         continue;
    1034            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1035            0 :                         continue;
    1036              :                     }
    1037              : 
    1038              :                     // get the appropriate index for the Kokkos view of the field
    1039            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1040            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1041              :                     }
    1042              : 
    1043            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1044            0 :                         if (global_dofs[i] == global_dofs[j]) {
    1045            0 :                             J_nd = global_dof_ndindices[j];
    1046              : 
    1047              :                             // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
    1048            0 :                             if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
    1049            0 :                                 && this->isDOFOnBoundary(J_nd)) {
    1050            0 :                                 continue;
    1051              :                             }
    1052              : 
    1053              :                             // get the appropriate index for the Kokkos view of the field
    1054            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1055            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1056              :                             }
    1057              : 
    1058              :                             // sum up all contributions of element matrix
    1059            0 :                             apply(resultView, I_nd) += A_K[i][j];
    1060              :                         }
    1061              :                     }
    1062              :                 }
    1063            0 :             });
    1064              : 
    1065            0 :         if (bcType == PERIODIC_FACE) {
    1066            0 :             resultField.accumulateHalo();
    1067            0 :             bcField.apply(resultField);
    1068            0 :             bcField.assignGhostToPhysical(resultField);
    1069              :         } else {
    1070            0 :             resultField.accumulateHalo_noghost();
    1071              :         }
    1072              : 
    1073              :         // apply the inverse diagonal after already summed all contributions from element matrices
    1074              :         using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
    1075            0 :         ippl::parallel_for("Loop over result view to apply inverse", field.getFieldRangePolicy(),
    1076            0 :             KOKKOS_LAMBDA(const index_array_type& args) {
    1077            0 :                 if (apply(resultView, args) != 0.0) {
    1078            0 :                     apply(resultView, args) = (1.0 / apply(resultView, args)) * apply(view, args);
    1079              :                 }
    1080              :             });
    1081            0 :         IpplTimings::stopTimer(outer_loop);
    1082              : 
    1083            0 :         IpplTimings::stopTimer(evalAx);
    1084              : 
    1085            0 :         return resultField;
    1086            0 :     }
    1087              : 
    1088              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1089              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1090              :     template <typename F>
    1091            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1092              :                            FieldRHS>::evaluateAx_diag(FieldLHS& field, F& evalFunction) const {
    1093            0 :         Inform m("");
    1094              : 
    1095              :         // start a timer
    1096            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
    1097            0 :         IpplTimings::startTimer(evalAx);
    1098              : 
    1099              :         // get number of ghost cells in field
    1100            0 :         const int nghost = field.getNghost();
    1101              : 
    1102              :         // create a new field for result with view initialized to zero (views are initialized to
    1103              :         // zero by default)
    1104            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
    1105              : 
    1106              :         // List of quadrature weights
    1107            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1108            0 :             this->quadrature_m.getWeightsForRefElement();
    1109              : 
    1110              :         // List of quadrature nodes
    1111            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1112            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1113              : 
    1114              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
    1115              :         // Gradients of the basis functions for the DOF at the quadrature nodes
    1116            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
    1117            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1118            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1119            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
    1120              :             }
    1121              :         }
    1122              : 
    1123              :         // Make local element matrix -- does not change through the element mesh
    1124              :         // Element matrix
    1125            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
    1126              : 
    1127              :         // 1. Compute the Galerkin element matrix A_K
    1128            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
    1129            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
    1130            0 :                 A_K[i][j] = 0.0;
    1131            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1132            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
    1133              :                 }
    1134              :             }
    1135              :         }
    1136              : 
    1137              :         // Get field data and atomic result data,
    1138              :         // since it will be added to during the kokkos loop
    1139            0 :         ViewType view             = field.getView();
    1140            0 :         AtomicViewType resultView = resultField.getView();
    1141              : 
    1142              :         // Get boundary conditions from field
    1143            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
    1144            0 :         FieldBC bcType = bcField[0]->getBCType();
    1145              : 
    1146              :         // Get domain information
    1147            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
    1148              : 
    1149              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1150              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1151              : 
    1152              :         // start a timer
    1153            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
    1154            0 :         IpplTimings::startTimer(outer_loop);
    1155              : 
    1156              :         // Loop over elements to compute contributions
    1157            0 :         Kokkos::parallel_for(
    1158            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1159            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1160            0 :                 const size_t elementIndex                            = elementIndices(index);
    1161            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1162            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1163            0 :                     this->getGlobalDOFIndices(elementIndex);
    1164            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1165              : 
    1166            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1167            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1168              :                 }
    1169              : 
    1170              :                 // local DOF indices
    1171              :                 size_t i, j;
    1172              : 
    1173              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1174              :                 // each dimension)
    1175            0 :                 indices_t I_nd, J_nd;
    1176              : 
    1177              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1178            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1179            0 :                     I_nd = global_dof_ndindices[i];
    1180              : 
    1181              :                     // Handle boundary DOFs
    1182              :                     // If Zero Dirichlet BCs, skip this DOF
    1183              :                     // If Constant Dirichlet BCs, identity
    1184            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1185            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1186            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1187              :                         }
    1188            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
    1189            0 :                         continue;
    1190            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1191            0 :                         continue;
    1192              :                     }
    1193              : 
    1194              :                     // get the appropriate index for the Kokkos view of the field
    1195            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1196            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1197              :                     }
    1198              : 
    1199            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1200            0 :                         if (global_dofs[i] == global_dofs[j]) {
    1201            0 :                             J_nd = global_dof_ndindices[j];
    1202              : 
    1203              :                             // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
    1204            0 :                             if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
    1205            0 :                                 && this->isDOFOnBoundary(J_nd)) {
    1206            0 :                                 continue;
    1207              :                             }
    1208              : 
    1209              :                             // get the appropriate index for the Kokkos view of the field
    1210            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1211            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1212              :                             }
    1213              : 
    1214            0 :                             apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
    1215              :                         }
    1216              :                     }
    1217              :                 }
    1218            0 :             });
    1219            0 :         IpplTimings::stopTimer(outer_loop);
    1220              : 
    1221            0 :         if (bcType == PERIODIC_FACE) {
    1222            0 :             resultField.accumulateHalo();
    1223            0 :             bcField.apply(resultField);
    1224            0 :             bcField.assignGhostToPhysical(resultField);
    1225              :         } else {
    1226            0 :             resultField.accumulateHalo_noghost();
    1227              :         }
    1228              : 
    1229            0 :         IpplTimings::stopTimer(evalAx);
    1230              : 
    1231            0 :         return resultField;
    1232            0 :     }
    1233              : 
    1234              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1235              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1236              :     template <typename F>
    1237            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1238              :                            FieldRHS>::evaluateAx_lift(FieldLHS& field, F& evalFunction) const {
    1239            0 :         Inform m("");
    1240              : 
    1241              :         // get number of ghost cells in field
    1242            0 :         const int nghost = field.getNghost();
    1243              : 
    1244              :         // create a new field for result with view initialized to zero (views are initialized to
    1245              :         // zero by default)
    1246            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
    1247              : 
    1248              :         // List of quadrature weights
    1249            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1250            0 :             this->quadrature_m.getWeightsForRefElement();
    1251              : 
    1252              :         // List of quadrature nodes
    1253            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1254            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1255              : 
    1256              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
    1257              :         // Gradients of the basis functions for the DOF at the quadrature nodes
    1258            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
    1259            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1260            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1261            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
    1262              :             }
    1263              :         }
    1264              : 
    1265              :         // Make local element matrix -- does not change through the element mesh
    1266              :         // Element matrix
    1267            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
    1268              : 
    1269              :         // 1. Compute the Galerkin element matrix A_K
    1270            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
    1271            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
    1272            0 :                 A_K[i][j] = 0.0;
    1273            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1274            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
    1275              :                 }
    1276              :             }
    1277              :         }
    1278              : 
    1279              :         // Get field data and atomic result data,
    1280              :         // since it will be added to during the kokkos loop
    1281            0 :         ViewType view             = field.getView();
    1282            0 :         AtomicViewType resultView = resultField.getView();
    1283              : 
    1284              :         // Get domain information
    1285            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
    1286              : 
    1287              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1288              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1289              : 
    1290              :         // Loop over elements to compute contributions
    1291            0 :         Kokkos::parallel_for(
    1292            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1293            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1294            0 :                 const size_t elementIndex                            = elementIndices(index);
    1295            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1296            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1297            0 :                     this->getGlobalDOFIndices(elementIndex);
    1298            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1299              : 
    1300            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1301            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1302              :                 }
    1303              : 
    1304              :                 // local DOF indices (both i and j go from 0 to numDOFs-1 in the element)
    1305              :                 size_t i, j;
    1306              : 
    1307              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1308              :                 // each dimension)
    1309            0 :                 indices_t I_nd, J_nd;
    1310              : 
    1311              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1312            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1313            0 :                     I_nd = global_dof_ndindices[i];
    1314              : 
    1315              :                     // Skip if on a row of the matrix
    1316            0 :                     if (this->isDOFOnBoundary(I_nd)) {
    1317            0 :                         continue;
    1318              :                     }
    1319              : 
    1320              :                     // get the appropriate index for the Kokkos view of the field
    1321            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1322            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1323              :                     }
    1324              : 
    1325            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1326            0 :                         J_nd = global_dof_ndindices[j];
    1327              : 
    1328              :                         // Contribute to lifting only if on a boundary DOF
    1329            0 :                         if (this->isDOFOnBoundary(J_nd)) {
    1330              :                             // get the appropriate index for the Kokkos view of the field
    1331            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1332            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1333              :                             }
    1334            0 :                             apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
    1335            0 :                             continue;
    1336            0 :                         }
    1337              : 
    1338              :                     }
    1339              :                 }
    1340            0 :             });
    1341            0 :         resultField.accumulateHalo();
    1342              : 
    1343            0 :         return resultField;
    1344            0 :     }
    1345              : 
    1346              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1347              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1348           20 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1349              :                        FieldRHS>::evaluateLoadVector(FieldRHS& field) const {
    1350           20 :         Inform m("");
    1351              : 
    1352              :         // start a timer
    1353           20 :         static IpplTimings::TimerRef evalLoadV = IpplTimings::getTimer("evaluateLoadVector");
    1354           20 :         IpplTimings::startTimer(evalLoadV);
    1355              : 
    1356              :         // List of quadrature weights
    1357           20 :         const Vector<T, QuadratureType::numElementNodes> w =
    1358           20 :             this->quadrature_m.getWeightsForRefElement();
    1359              : 
    1360              :         // List of quadrature nodes
    1361           20 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1362           20 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1363              : 
    1364           20 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1365              : 
    1366              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1367           20 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1368          648 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1369         5108 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1370         4480 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1371              :             }
    1372              :         }
    1373              : 
    1374              :         // Absolute value of det Phi_K
    1375           20 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1376           40 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1377              : 
    1378              :         // Get domain information and ghost cells
    1379           20 :         auto ldom        = (field.getLayout()).getLocalNDIndex();
    1380           20 :         const int nghost = field.getNghost();
    1381              : 
    1382              :         // Get boundary conditions from field
    1383           20 :         BConds<FieldRHS, Dim>& bcField = field.getFieldBC();
    1384           20 :         FieldBC bcType = bcField[0]->getBCType();
    1385              : 
    1386           20 :         FieldRHS temp_field(field.get_mesh(), field.getLayout(), nghost);
    1387           20 :         temp_field.setFieldBC(bcField);
    1388              : 
    1389              :         // Get field data and make it atomic,
    1390              :         // since it will be added to during the kokkos loop
    1391              :         // We work with a temporary field since we need to use field
    1392              :         // to evaluate the load vector; then we assign temp to RHS field
    1393           20 :         AtomicViewType atomic_view = temp_field.getView();
    1394              : 
    1395              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1396              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1397              : 
    1398              :         // start a timer
    1399           20 :         static IpplTimings::TimerRef outer_loop =
    1400           20 :             IpplTimings::getTimer("evaluateLoadVec: outer loop");
    1401           20 :         IpplTimings::startTimer(outer_loop);
    1402              : 
    1403              :         // Loop over elements to compute contributions
    1404           20 :         Kokkos::parallel_for(
    1405           40 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1406          344 :             KOKKOS_CLASS_LAMBDA(size_t index) {
    1407            0 :                 const size_t elementIndex                              = elementIndices(index);
    1408          304 :                 const Vector<size_t, numElementDOFs> local_dofs  = this->getLocalDOFIndices();
    1409            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1410          304 :                     this->getGlobalDOFIndices(elementIndex);
    1411              : 
    1412              :                 size_t i, I;
    1413              : 
    1414              :                 // 1. Compute b_K
    1415         4720 :                 for (i = 0; i < numElementDOFs; ++i) {
    1416            0 :                     I = global_dofs[i];
    1417              : 
    1418              :                     // TODO fix for higher order
    1419         2208 :                     auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1420              : 
    1421              :                     // Skip boundary DOFs (Zero and Constant Dirichlet BCs)
    1422            0 :                     if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE))
    1423         2208 :                         && (this->isDOFOnBoundary(dof_ndindex_I))) {
    1424            0 :                         continue;
    1425              :                     }
    1426              : 
    1427              :                     // calculate the contribution of this element
    1428            0 :                     T contrib = 0;
    1429        57264 :                     for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1430            0 :                         T val = 0;
    1431       499104 :                         for (size_t j = 0; j < numElementDOFs; ++j) {
    1432              :                             // get field index corresponding to this DOF
    1433            0 :                             size_t J           = global_dofs[j];
    1434       442800 :                             auto dof_ndindex_J = this->getMeshVertexNDIndex(J);
    1435      1763712 :                             for (unsigned d = 0; d < Dim; ++d) {
    1436            0 :                                 dof_ndindex_J[d] = dof_ndindex_J[d] - ldom[d].first() + nghost;
    1437              :                             }
    1438              : 
    1439              :                             // get field value at DOF and interpolate to q_k
    1440       442800 :                             val += basis_q[k][j] * apply(field, dof_ndindex_J);
    1441              :                         }
    1442              : 
    1443            0 :                         contrib += w[k] * basis_q[k][i] * absDetDPhi * val;
    1444              :                     }
    1445              : 
    1446              :                     // get the appropriate index for the Kokkos view of the field
    1447         3720 :                     for (unsigned d = 0; d < Dim; ++d) {
    1448            0 :                         dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1449              :                     }
    1450              : 
    1451              :                     // add the contribution of the element to the field
    1452          960 :                     apply(atomic_view, dof_ndindex_I) += contrib;
    1453              : 
    1454              :                 }
    1455            0 :             });
    1456           20 :         IpplTimings::stopTimer(outer_loop);
    1457              : 
    1458           20 :         temp_field.accumulateHalo();
    1459              : 
    1460           20 :         if ((bcType == PERIODIC_FACE) || (bcType == CONSTANT_FACE)) {
    1461            0 :             bcField.apply(temp_field);
    1462            0 :             bcField.assignGhostToPhysical(temp_field);
    1463              :         }
    1464              : 
    1465           20 :         field = temp_field;
    1466              : 
    1467           20 :         IpplTimings::stopTimer(evalLoadV);
    1468           20 :     }
    1469              : 
    1470              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1471              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1472              :     template <typename F>
    1473            0 :     T LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::computeErrorL2(
    1474              :         const FieldLHS& u_h, const F& u_sol) const {
    1475            0 :         if (this->quadrature_m.getOrder() < (2 * Order + 1)) {
    1476              :             // throw exception
    1477            0 :             throw IpplException(
    1478              :                 "LagrangeSpace::computeErrorL2()",
    1479              :                 "Order of quadrature rule for error computation should be > 2*p + 1");
    1480              :         }
    1481              : 
    1482              :         // List of quadrature weights
    1483            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1484            0 :             this->quadrature_m.getWeightsForRefElement();
    1485              : 
    1486              :         // List of quadrature nodes
    1487            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1488            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1489              : 
    1490              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1491            0 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1492            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1493            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1494            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1495              :             }
    1496              :         }
    1497              : 
    1498            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1499              : 
    1500              :         // Absolute value of det Phi_K
    1501            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1502            0 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1503              : 
    1504              :         // Variable to sum the error to
    1505            0 :         T error = 0;
    1506              : 
    1507              :         // Get domain information and ghost cells
    1508            0 :         auto ldom        = (u_h.getLayout()).getLocalNDIndex();
    1509            0 :         const int nghost = u_h.getNghost();
    1510              : 
    1511              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1512              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1513              : 
    1514              :         // Loop over elements to compute contributions
    1515            0 :         Kokkos::parallel_reduce(
    1516            0 :             "Compute error over elements", policy_type(0, elementIndices.extent(0)),
    1517            0 :             KOKKOS_CLASS_LAMBDA(size_t index, double& local) {
    1518            0 :                 const size_t elementIndex = elementIndices(index);
    1519            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1520            0 :                     this->getGlobalDOFIndices(elementIndex);
    1521              : 
    1522              :                 // contribution of this element to the error
    1523            0 :                 T contrib = 0;
    1524            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1525            0 :                     T val_u_sol = u_sol(this->ref_element_m.localToGlobal(
    1526            0 :                         this->getElementMeshVertexPoints(this->getElementNDIndex(elementIndex)),
    1527            0 :                         q[k]));
    1528              : 
    1529            0 :                     T val_u_h = 0;
    1530            0 :                     for (size_t i = 0; i < numElementDOFs; ++i) {
    1531              :                         // get field index corresponding to this DOF
    1532            0 :                         size_t I           = global_dofs[i];
    1533            0 :                         auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1534            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1535            0 :                             dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1536              :                         }
    1537              : 
    1538              :                         // get field value at DOF and interpolate to q_k
    1539            0 :                         val_u_h += basis_q[k][i] * apply(u_h, dof_ndindex_I);
    1540              :                     }
    1541              : 
    1542            0 :                     contrib += w[k] * Kokkos::pow(val_u_sol - val_u_h, 2) * absDetDPhi;
    1543              :                 }
    1544            0 :                 local += contrib;
    1545            0 :             },
    1546            0 :             Kokkos::Sum<double>(error));
    1547              : 
    1548              :         // MPI reduce
    1549            0 :         T global_error = 0.0;
    1550            0 :         Comm->allreduce(error, global_error, 1, std::plus<T>());
    1551              : 
    1552            0 :         return Kokkos::sqrt(global_error);
    1553            0 :     }
    1554              : 
    1555              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1556              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1557            0 :     T LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::computeAvg(
    1558              :         const FieldLHS& u_h) const {
    1559            0 :         if (this->quadrature_m.getOrder() < (2 * Order + 1)) {
    1560              :             // throw exception
    1561            0 :             throw IpplException(
    1562              :                 "LagrangeSpace::computeAvg()",
    1563              :                 "Order of quadrature rule for error computation should be > 2*p + 1");
    1564              :         }
    1565              : 
    1566              :         // List of quadrature weights
    1567            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1568            0 :             this->quadrature_m.getWeightsForRefElement();
    1569              : 
    1570              :         // List of quadrature nodes
    1571            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1572            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1573              : 
    1574              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1575            0 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1576            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1577            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1578            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1579              :             }
    1580              :         }
    1581              : 
    1582            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1583              : 
    1584              :         // Absolute value of det Phi_K
    1585            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1586            0 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1587              : 
    1588              :         // Variable to sum the error to
    1589            0 :         T avg = 0;
    1590              : 
    1591              :         // Get domain information and ghost cells
    1592            0 :         auto ldom        = (u_h.getLayout()).getLocalNDIndex();
    1593            0 :         const int nghost = u_h.getNghost();
    1594              : 
    1595              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1596              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1597              : 
    1598              :         // Loop over elements to compute contributions
    1599            0 :         Kokkos::parallel_reduce(
    1600            0 :             "Compute average over elements", policy_type(0, elementIndices.extent(0)),
    1601            0 :             KOKKOS_CLASS_LAMBDA(size_t index, double& local) {
    1602            0 :                 const size_t elementIndex = elementIndices(index);
    1603            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1604            0 :                     this->getGlobalDOFIndices(elementIndex);
    1605              : 
    1606              :                 // contribution of this element to the error
    1607            0 :                 T contrib = 0;
    1608            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1609            0 :                     T val_u_h = 0;
    1610            0 :                     for (size_t i = 0; i < numElementDOFs; ++i) {
    1611              :                         // get field index corresponding to this DOF
    1612            0 :                         size_t I           = global_dofs[i];
    1613            0 :                         auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1614            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1615            0 :                             dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1616              :                         }
    1617              : 
    1618              :                         // get field value at DOF and interpolate to q_k
    1619            0 :                         val_u_h += basis_q[k][i] * apply(u_h, dof_ndindex_I);
    1620              :                     }
    1621              : 
    1622            0 :                     contrib += w[k] * val_u_h * absDetDPhi;
    1623              :                 }
    1624            0 :                 local += contrib;
    1625            0 :             },
    1626            0 :             Kokkos::Sum<double>(avg));
    1627              : 
    1628              :         // MPI reduce
    1629            0 :         T global_avg = 0.0;
    1630            0 :         Comm->allreduce(avg, global_avg, 1, std::plus<T>());
    1631              : 
    1632            0 :         return global_avg;
    1633            0 :     }
    1634              : 
    1635              : }  // namespace ippl
         |