[deal.II] Integrating a deal.II-specific function in a NOX-Interface for MPI-threads

'Maxi Miller' via deal.II User Group Fri, 05 Apr 2019 08:47:38 -0700

I tested the integration of NOX into my program, for future nonlinear 
solvers. For that, I wanted to re-use the existing function for assembling 
the right hand side, and wrote the following code for the interface:
template <int dim>
class ProblemInterface : public NOX::Epetra::Interface::Required,
        public NOX::Epetra::Interface::Preconditioner,
        public NOX::Epetra::Interface::Jacobian
{
public:
    ProblemInterface(std::function<void(const TrilinosWrappers::MPI::Vector
&, const TrilinosWrappers::MPI::Vector&, TrilinosWrappers::MPI::Vector&)> 
residual_function,
                     std::function<void(Epetra_CrsMatrix &)> 
jacobian_function,
                     const MPI_Comm &mpi_communicator,
                     const IndexSet &locally_owned_dofs,
                     const IndexSet &locally_relevant_dofs) :
        residual_function(residual_function),
        jacobian_function(jacobian_function),
        mpi_communicator(mpi_communicator),
        locally_owned_dofs(locally_owned_dofs),
        locally_relevant_dofs(locally_relevant_dofs)
    {
        residual_vec.reinit(locally_owned_dofs, locally_relevant_dofs, 
mpi_communicator);
        temp_f_vec.reinit(locally_owned_dofs, locally_relevant_dofs, 
mpi_communicator);
        f_vec.reinit(locally_owned_dofs, locally_relevant_dofs, 
mpi_communicator);


    };

    ~ProblemInterface(){

    };

    bool computeF(const Epetra_Vector &x, Epetra_Vector &FVec, NOX::Epetra::
Interface::Required::FillType)
    {

        Epetra_Vector temp_f_epetra_vec = Epetra_Vector(View, temp_f_vec.
trilinos_vector(), 0);
        Epetra_Vector f_epetra_vec = Epetra_Vector(View, f_vec.
trilinos_vector(), 0);
        rhs = &FVec;

        int err = 0;
        err = rhs->PutScalar(0.0);
        if(err != 0)
            throw;

        temp_f_vec = 0.;
        residual_vec = 0.;
        //std::cout << f_vec.size() << '\t' << locally_owned_dofs.size() << 
'\t' << x.MyLength() << '\n';
        f_epetra_vec = x;
        //Fill rhs
        //        Vector<double> f_vec(FVec.MyLength()), 
residual_vec(FVec.MyLength()), temp_f_vec(FVec.MyLength());
        //        for(size_t i = 0; i < f_vec.size(); ++i)
        //            f_vec[i] = x[i];
        residual_function(temp_f_vec, f_vec, residual_vec);
        FVec = Epetra_Vector(Copy, residual_vec.trilinos_vector(), 0);

        return true;
    }
private:
    std::function<void(const TrilinosWrappers::MPI::Vector&, const 
TrilinosWrappers::MPI::Vector&, TrilinosWrappers::MPI::Vector&)> 
residual_function;
    std::function<void(Epetra_CrsMatrix&)> jacobian_function;
    Epetra_Vector *rhs;

    MPI_Comm mpi_communicator;
    IndexSet locally_owned_dofs, locally_relevant_dofs;

    TrilinosWrappers::MPI::Vector f_vec, temp_f_vec, residual_vec;
};

The function "residual_function" is designed as
template <int dim>
void Step5<dim>::calculate_residual(const TrilinosWrappers::MPI::Vector &u, 
TrilinosWrappers::MPI::Vector &residual)
{
    QGauss<dim> quadrature_formula(fe.degree + 1);

    FEValues<dim> fe_values(fe,
                            quadrature_formula,
                            update_values | update_gradients |
                            update_quadrature_points | update_JxW_values);

    const unsigned int dofs_per_cell = fe.dofs_per_cell;
    const unsigned int n_q_points    = quadrature_formula.size();

    residual.reinit(locally_owned_dofs, locally_relevant_dofs, 
mpi_communicator);

    std::vector<Tensor<1, dim>> grad_u(quadrature_formula.size());
    std::vector<double> val_u(quadrature_formula.size());

    //pcout << "Create evaluation point\n";
    TrilinosWrappers::MPI::Vector local_evaluation_point = TrilinosWrappers
::MPI::Vector(u);

    Vector<double> cell_rhs(dofs_per_cell);
    std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
    //pcout << "Looping over cells\n";
    for (const auto &cell : dof_handler.active_cell_iterators())
    {
        if(cell->is_locally_owned())
        {
            cell_rhs    = 0.;

            fe_values.reinit(cell);

            fe_values.get_function_values(local_evaluation_point, val_u);
            fe_values.get_function_gradients(local_evaluation_point, grad_u
);

            for (unsigned int q = 0; q < n_q_points; ++q)
            {
                for (unsigned int i = 0; i < dofs_per_cell; ++i)
                {
                    assemble_rhs(cell_rhs(i),
                                 -1,
                                 val_u[q],
                                 grad_u[q],
                                 fe_values.quadrature_point(q),
                                 fe_values.shape_value(i, q),
                                 fe_values.shape_grad(i, q),
                                 fe_values.JxW(q));
                }
            }

            cell->get_dof_indices(local_dof_indices);
            hanging_node_constraints.distribute_local_to_global(cell_rhs,
                                                                
local_dof_indices,
                                                                residual);
        }
    }
    residual.compress(VectorOperation::add);
}
with 
template <int dim>
void Step5<dim>::assemble_rhs(double &return_value,
                              const double factor,
                              const double &val_u,
                              const Tensor<1, dim> &grad_u,
                              const Point<dim> p,
                              const double &fe_values_value,
                              const Tensor<1, dim> &fe_gradient_value,
                              const double JxW_value)
{
    (void) val_u;
    return_value += factor
            * (coefficient(p)
               * fe_values_value
               + grad_u
               * fe_gradient_value
               )
            * JxW_value;
}

When running everything on one thread, it works fine. Running it on several 
threads leads to a crash when distributing the cell_rhs-values to the 
residual values, due to the different sizes (Trilinos-vectors are shrinked 
down, corresponding to the number of threads, while the deal.II vectors 
still operate with their full length). Is there a way do solve the problem 
without having to create two residual-functions?

-- 
The deal.II project is located at http://www.dealii.org/
For mailing list/forum options, see 
https://groups.google.com/d/forum/dealii?hl=en
--- 
You received this message because you are subscribed to the Google Groups 
"deal.II User Group" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to dealii+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

/* ---------------------------------------------------------------------
 *
 * Copyright (C) 1999 - 2018 by the deal.II authors
 *
 * This file is part of the deal.II library.
 *
 * The deal.II library is free software; you can use it, redistribute
 * it, and/or modify it under the terms of the GNU Lesser General
 * Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * The full text of the license can be found in the file LICENSE.md at
 * the top level directory of deal.II.
 *
 * ---------------------------------------------------------------------

 *
 * Author: Wolfgang Bangerth, University of Heidelberg, 1999
 */


// @sect3{Include files}

//Nox include files
#include <NOX.H>
#include <NOX_LAPACK_Group.H>
#include <Teuchos_StandardCatchMacros.hpp>

#include <deal.II/base/quadrature_lib.h>
#include <deal.II/base/function.h>
#include <deal.II/base/logstream.h>
#include <deal.II/base/convergence_table.h>
#include <deal.II/base/utilities.h>
#include <deal.II/base/conditional_ostream.h>
#include <deal.II/base/index_set.h>
#include <deal.II/base/timer.h>

#include <deal.II/lac/vector.h>
#include <deal.II/lac/full_matrix.h>
#include <deal.II/lac/sparse_matrix.h>
#include <deal.II/lac/sparse_direct.h>
#include <deal.II/lac/dynamic_sparsity_pattern.h>
#include <deal.II/lac/solver_cg.h>
#include <deal.II/lac/precondition.h>
#include <deal.II/lac/solver_gmres.h>
#include <deal.II/lac/trilinos_vector.h>
#include <deal.II/lac/sparsity_tools.h>
#include <deal.II/lac/generic_linear_algebra.h>

#include <deal.II/grid/tria.h>

#include <deal.II/grid/grid_generator.h>
#include <deal.II/grid/tria_accessor.h>
#include <deal.II/grid/tria_iterator.h>

#include <deal.II/dofs/dof_accessor.h>
#include <deal.II/dofs/dof_tools.h>
#include <deal.II/dofs/dof_handler.h>

#include <deal.II/fe/fe_q.h>
#include <deal.II/fe/fe_values.h>

#include <deal.II/numerics/vector_tools.h>
#include <deal.II/numerics/matrix_tools.h>
#include <deal.II/numerics/data_out.h>

#include <deal.II/distributed/tria.h>
#include <deal.II/distributed/grid_refinement.h>

// NOX Objects
#include "NOX.H"
#include "NOX_Epetra.H"

// Trilinos Objects
#ifdef HAVE_MPI
#include "Epetra_MpiComm.h"
#else
#include "Epetra_SerialComm.h"
#endif
#include "Epetra_Vector.h"
#include "Epetra_Operator.h"
#include "Epetra_RowMatrix.h"
#include "NOX_Epetra_Interface_Required.H" // base class
#include "NOX_Epetra_Interface_Jacobian.H" // base class
#include "NOX_Epetra_Interface_Preconditioner.H" // base class
#include "Epetra_CrsMatrix.h"
#include "Epetra_Map.h"
#include "Epetra_LinearProblem.h"
#include "AztecOO.h"
#include "Teuchos_StandardCatchMacros.hpp"

// User's application specific files
//#include "problem_interface.h" // Interface file to NOX
//#include "finite_element_problem.h"

// Required for reading and writing parameter lists from xml format
// Configure Trilinos with --enable-teuchos-extended
#ifdef HAVE_TEUCHOS_EXTENDED
#include "Teuchos_XMLParameterListHelpers.hpp"
#include <sstream>
#endif

// This is C++ ...
#include <fstream>
#include <iostream>

using namespace dealii;

template <int dim>
class Solution : public Function<dim>
{
public:
    Solution() : Function<dim>(1)
    {

    }

    virtual double value(const Point<dim> &p, const unsigned int component) const override;
    virtual Tensor<1, dim> gradient(const Point<dim> &p, const unsigned int component) const override;

private:
};

template <int dim>
double Solution<dim>::value(const Point<dim> &p, const unsigned int) const
{
    const double x = p[0];
    const double y = p[1];
    return 10 * (y - x * x) + 1 - x;
}

template<int dim>
Tensor<1, dim> Solution<dim>::gradient(const Point<dim> &p, const unsigned int) const
{
    Tensor<1, dim> return_value;
    AssertThrow(dim == 2, ExcNotImplemented());

    const double x = p[0];
    const double y = p[1];
    return_value[0] = M_PI * cos(M_PI * x) * sin(M_PI * y);
    return_value[1] = M_PI * cos(M_PI * y) * sin(M_PI * x);
    return return_value;
}

template <int dim>
class ProblemInterface : public NOX::Epetra::Interface::Required,
        public NOX::Epetra::Interface::Preconditioner,
        public NOX::Epetra::Interface::Jacobian
{
public:
    ProblemInterface(std::function<void(const TrilinosWrappers::MPI::Vector&, const TrilinosWrappers::MPI::Vector&, TrilinosWrappers::MPI::Vector&)> residual_function,
                     std::function<void(Epetra_CrsMatrix &)> jacobian_function,
                     const MPI_Comm &mpi_communicator,
                     const IndexSet &locally_owned_dofs,
                     const IndexSet &locally_relevant_dofs) :
        residual_function(residual_function),
        jacobian_function(jacobian_function),
        mpi_communicator(mpi_communicator),
        locally_owned_dofs(locally_owned_dofs),
        locally_relevant_dofs(locally_relevant_dofs)
    {
        residual_vec.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);
        temp_f_vec.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);
        f_vec.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);

    };

    ~ProblemInterface(){

    };

    bool computeF(const Epetra_Vector &x, Epetra_Vector &FVec, NOX::Epetra::Interface::Required::FillType)
    {

        Epetra_Vector temp_f_epetra_vec = Epetra_Vector(View, temp_f_vec.trilinos_vector(), 0);
        Epetra_Vector f_epetra_vec = Epetra_Vector(View, f_vec.trilinos_vector(), 0);
        rhs = &FVec;

        int err = 0;
        err = rhs->PutScalar(0.0);
        if(err != 0)
            throw;

        temp_f_vec = 0.;
        residual_vec = 0.;
        //std::cout << f_vec.size() << '\t' << locally_owned_dofs.size() << '\t' << x.MyLength() << '\n';
        f_epetra_vec = x;
        //Fill rhs
        //        Vector<double> f_vec(FVec.MyLength()), residual_vec(FVec.MyLength()), temp_f_vec(FVec.MyLength());
        //        for(size_t i = 0; i < f_vec.size(); ++i)
        //            f_vec[i] = x[i];
        residual_function(temp_f_vec, f_vec, residual_vec);
        FVec = Epetra_Vector(Copy, residual_vec.trilinos_vector(), 0);

        return true;
    }

    bool computeJacobian(const Epetra_Vector &x, Epetra_Operator &Jac)
    {
        Epetra_CrsMatrix* Jacobian = dynamic_cast<Epetra_CrsMatrix*>(&Jac);
        if (Jacobian == NULL) {
            std::cout << "ERROR: Problem_Interface::computeJacobian() - The supplied"
                      << "Epetra_Operator is NOT an Epetra_RowMatrix!" << std::endl;
            throw;
        }
        jacobian_function(*Jacobian);
        return false;
    }

    bool computePrecMatrix(const Epetra_Vector &x, Epetra_Operator &M)
    {
        Epetra_RowMatrix* precMatrix = dynamic_cast<Epetra_RowMatrix*>(&M);
        if (precMatrix == NULL) {
            std::cout << "ERROR: Problem_Interface::computePreconditioner() - The supplied"
                      << "Epetra_Operator is NOT an Epetra_RowMatrix!" << std::endl;
            throw;
        }
        //return problem.evaluate(MATRIX_ONLY, &x, NULL, precMatrix);
        return false;
    }

    bool computePreconditioner(const Epetra_Vector& x, Epetra_Operator& M,
                               Teuchos::ParameterList* precParams = 0)
    {
        std::cout << "ERROR: Problem_Interface::preconditionVector() - Use Explicit Jaciban only for this test problem!" << std::endl;
        throw 1;
        return false;
    }

private:
    std::function<void(const TrilinosWrappers::MPI::Vector&, const TrilinosWrappers::MPI::Vector&, TrilinosWrappers::MPI::Vector&)> residual_function;
    std::function<void(Epetra_CrsMatrix&)> jacobian_function;
    Epetra_Vector *rhs;

    MPI_Comm mpi_communicator;
    IndexSet locally_owned_dofs, locally_relevant_dofs;

    TrilinosWrappers::MPI::Vector f_vec, temp_f_vec, residual_vec;
};

// @sect3{The <code>Step5</code> class template}

// The main class is mostly as in the previous example. The most visible
// change is that the function <code>make_grid_and_dofs</code> has been
// removed, since creating the grid is now done in the <code>run</code>
// function and the rest of its functionality is now in
// <code>setup_system</code>. Apart from this, everything is as before.
template <int dim>
class Step5
{
public:
    Step5();
    ~Step5();
    void run();

private:
    void setup_system();
    void assemble_system();
    void calculate_residual_multi_val(const TrilinosWrappers::MPI::Vector &epsilon_v, const TrilinosWrappers::MPI::Vector &u, TrilinosWrappers::MPI::Vector &residual);
    void calculate_residual(const TrilinosWrappers::MPI::Vector &u, TrilinosWrappers::MPI::Vector &residual);
    void calculate_jacobian(Epetra_CrsMatrix &jacobian);
    void solve();
    void solve_with_NOX();
    void solve_with_matrix_jacobian();
    void solve_with_math_jacobian();
    void process_solution();
    void output_results(const unsigned int cycle);

    void assemble_rhs(double &return_value,
                      const double factor,
                      const double &val_u,
                      const Tensor<1, dim> &grad_u,
                      const Point<dim> p,
                      const double &fe_values_value,
                      const Tensor<1, dim> &fe_gradient_value,
                      const double JxW_value);

    //    void assemble_jacobian(double &return_value,
    //                           const double factor,
    //                           const double &val_u,
    //                           const Tensor<1, dim> &grad_u,
    //                           const Point<dim> p,
    //                           const double &fe_values_value,
    //                           const Tensor<1, dim> &fe_gradient_value,
    //                           const double JxW_value);

    MPI_Comm mpi_communicator;

    parallel::distributed::Triangulation<dim> triangulation;
    DoFHandler<dim>    dof_handler;
    FE_Q<dim>          fe;

    IndexSet locally_owned_dofs;
    IndexSet locally_relevant_dofs;

    SparsityPattern      sparsity_pattern;
    TrilinosWrappers::SparseMatrix system_matrix;
    TrilinosWrappers::MPI::Vector locally_relevant_solution;
    TrilinosWrappers::MPI::Vector locally_relevant_update;
    TrilinosWrappers::MPI::Vector system_rhs;

    TrilinosWrappers::MPI::Vector evaluation_point;

    AffineConstraints<double> hanging_node_constraints;

    ConvergenceTable convergence_table;

    ConditionalOStream pcout;
    TimerOutput computing_timer;
};


// @sect3{Working with nonconstant coefficients}

// In step-4, we showed how to use non-constant boundary values and right hand
// side.  In this example, we want to use a variable coefficient in the
// elliptic operator instead. Since we have a function which just depends on
// the point in space we can do things a bit more simply and use a plain
// function instead of inheriting from Function.

// This is the implementation of the coefficient function for a single
// point. We let it return 20 if the distance to the origin is less than 0.5,
// and 1 otherwise.
template <int dim>
double coefficient(const Point<dim> &p)
{
    return -2 * M_PI * M_PI * sin(M_PI * p[0]) * sin(M_PI * p[1]);
}

// @sect3{The <code>Step5</code> class implementation}

// @sect4{Step5::Step5}

// This function is as before.
template <int dim>
Step5<dim>::Step5()
    : mpi_communicator(MPI_COMM_WORLD),
      triangulation(mpi_communicator,
                    typename Triangulation<dim>::MeshSmoothing(Triangulation<dim>::smoothing_on_refinement | Triangulation<dim>::smoothing_on_coarsening)),
      dof_handler(triangulation),
      fe(2),
      pcout(std::cout, (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)),
      computing_timer(mpi_communicator, pcout, TimerOutput::summary, TimerOutput::wall_times)
{}

template <int dim>
Step5<dim>::~Step5<dim>()
{
    dof_handler.clear();
}


// @sect4{Step5::setup_system}

// This is the function <code>make_grid_and_dofs</code> from the previous
// example, minus the generation of the grid. Everything else is unchanged:
template <int dim>
void Step5<dim>::setup_system()
{
    TimerOutput::Scope t(computing_timer, "Setup system");
    dof_handler.distribute_dofs(fe);

    locally_owned_dofs = dof_handler.locally_owned_dofs();
    DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant_dofs);

    locally_relevant_solution.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);
    locally_relevant_update.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);
    evaluation_point.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);

    system_rhs.reinit(locally_owned_dofs, mpi_communicator);

    hanging_node_constraints.clear();
    hanging_node_constraints.reinit(locally_relevant_dofs);
    DoFTools::make_zero_boundary_constraints(dof_handler,
                                             0,
                                             hanging_node_constraints);
    hanging_node_constraints.close();

    pcout << "   Number of degrees of freedom: " << dof_handler.n_dofs()
          << std::endl;

    DynamicSparsityPattern dsp(locally_relevant_dofs);
    DoFTools::make_sparsity_pattern(dof_handler, dsp, hanging_node_constraints, false);
    SparsityTools::distribute_sparsity_pattern(dsp,
                                               dof_handler.n_locally_owned_dofs_per_processor(),
                                               mpi_communicator,
                                               locally_relevant_dofs);
    system_matrix.reinit(locally_owned_dofs,
                         locally_owned_dofs,
                         dsp,
                         mpi_communicator);
}

template <int dim>
void Step5<dim>::assemble_rhs(double &return_value,
                              const double factor,
                              const double &val_u,
                              const Tensor<1, dim> &grad_u,
                              const Point<dim> p,
                              const double &fe_values_value,
                              const Tensor<1, dim> &fe_gradient_value,
                              const double JxW_value)
{
    (void) val_u;
    return_value += factor
            * (coefficient(p)
               * fe_values_value
               + grad_u
               * fe_gradient_value
               )
            * JxW_value;
}


// @sect4{Step5::assemble_system}

// As in the previous examples, this function is not changed much with regard
// to its functionality, but there are still some optimizations which we will
// show. For this, it is important to note that if efficient solvers are used
// (such as the preconditioned CG method), assembling the matrix and right hand
// side can take a comparable time, and you should think about using one or
// two optimizations at some places.
//
// The first parts of the function are completely unchanged from before:
template <int dim>
void Step5<dim>::assemble_system()
{
    TimerOutput::Scope t(computing_timer, "Assemble system");
    QGauss<dim> quadrature_formula(fe.degree + 1);
    FEValues<dim> fe_values(fe,
                            quadrature_formula,
                            update_values | update_gradients |
                            update_quadrature_points | update_JxW_values);
    const unsigned int dofs_per_cell = fe.dofs_per_cell;
    const unsigned int n_q_points    = quadrature_formula.size();
    FullMatrix<double> cell_matrix(dofs_per_cell, dofs_per_cell);
    Vector<double>     cell_rhs(dofs_per_cell);
    std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);

    std::vector<Tensor<1, dim>> grad_u(quadrature_formula.size());
    std::vector<double> val_u(quadrature_formula.size());

    for (const auto &cell : dof_handler.active_cell_iterators())
    {
        if(cell->is_locally_owned())
        {
            cell_matrix = 0.;
            cell_rhs    = 0.;
            fe_values.reinit(cell);

            fe_values.get_function_values(locally_relevant_solution, val_u);
            fe_values.get_function_gradients(locally_relevant_solution, grad_u);

            for (unsigned int q = 0; q < n_q_points; ++q)
            {
                //            const double current_coefficient =
                //                    coefficient<dim>(fe_values.quadrature_point(q_index));
                for (unsigned int i = 0; i < dofs_per_cell; ++i)
                {
                    for (unsigned int j = 0; j < dofs_per_cell; ++j)
                        cell_matrix(i, j) +=
                                (1
                                 * fe_values.shape_grad(i, q)
                                 * fe_values.shape_grad(j, q)
                                 * fe_values.JxW(q));
                    //                cell_rhs(i) -= (coefficient<dim>(fe_values.quadrature_point(q))
                    //                                * fe_values.shape_value(i, q)
                    //                                - grad_u[q]
                    //                                * fe_values.shape_grad(i, q)
                    //                                )
                    //                        * fe_values.JxW(q);
                    //                    cell_matrix(i, j) -= fe_values.shape_grad(i, q)
                    //                            * fe_values.shape_grad(j, q)
                    //                            * fe_values.JxW(q);
                    //                cell_rhs(i) += coefficient<dim>(fe_values.quadrature_point(q))
                    //                        * fe_values.shape_value(i, q)
                    //                        * fe_values.JxW(q);
                    assemble_rhs(cell_rhs(i),
                                 -1,
                                 val_u[q],
                                 grad_u[q],
                                 fe_values.quadrature_point(q),
                                 fe_values.shape_value(i, q),
                                 fe_values.shape_grad(i, q),
                                 fe_values.JxW(q));
                }
            }
            cell->get_dof_indices(local_dof_indices);
            hanging_node_constraints.distribute_local_to_global(cell_matrix,
                                                                cell_rhs,
                                                                local_dof_indices,
                                                                system_matrix,
                                                                system_rhs);
        }
    }
    system_matrix.compress(VectorOperation::add);
    system_rhs.compress(VectorOperation::add);
}

template <int dim>
void Step5<dim>::calculate_jacobian(Epetra_CrsMatrix &jacobian)
{
    assemble_system();
    jacobian = system_matrix.trilinos_matrix();
}

template <int dim>
void Step5<dim>::calculate_residual_multi_val(const TrilinosWrappers::MPI::Vector &epsilon_v, const TrilinosWrappers::MPI::Vector &u, TrilinosWrappers::MPI::Vector &residual)
{
    TrilinosWrappers::MPI::Vector u_new(u);
    u_new += epsilon_v;
    calculate_residual(u_new, residual);
}


template <int dim>
void Step5<dim>::calculate_residual(const TrilinosWrappers::MPI::Vector &u, TrilinosWrappers::MPI::Vector &residual)
{
    QGauss<dim> quadrature_formula(fe.degree + 1);

    FEValues<dim> fe_values(fe,
                            quadrature_formula,
                            update_values | update_gradients |
                            update_quadrature_points | update_JxW_values);

    const unsigned int dofs_per_cell = fe.dofs_per_cell;
    const unsigned int n_q_points    = quadrature_formula.size();

    residual.reinit(locally_owned_dofs, locally_relevant_dofs, mpi_communicator);

    std::vector<Tensor<1, dim>> grad_u(quadrature_formula.size());
    std::vector<double> val_u(quadrature_formula.size());

    //pcout << "Create evaluation point\n";
    TrilinosWrappers::MPI::Vector local_evaluation_point = TrilinosWrappers::MPI::Vector(u);

    Vector<double> cell_rhs(dofs_per_cell);
    std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
    //pcout << "Looping over cells\n";
    for (const auto &cell : dof_handler.active_cell_iterators())
    {
        if(cell->is_locally_owned())
        {
            cell_rhs    = 0.;

            fe_values.reinit(cell);

            fe_values.get_function_values(local_evaluation_point, val_u);
            fe_values.get_function_gradients(local_evaluation_point, grad_u);

            for (unsigned int q = 0; q < n_q_points; ++q)
            {
                for (unsigned int i = 0; i < dofs_per_cell; ++i)
                {
                    assemble_rhs(cell_rhs(i),
                                 -1,
                                 val_u[q],
                                 grad_u[q],
                                 fe_values.quadrature_point(q),
                                 fe_values.shape_value(i, q),
                                 fe_values.shape_grad(i, q),
                                 fe_values.JxW(q));
                }
            }

            cell->get_dof_indices(local_dof_indices);
            hanging_node_constraints.distribute_local_to_global(cell_rhs,
                                                                local_dof_indices,
                                                                residual);
        }
    }
    residual.compress(VectorOperation::add);
}

template <int dim>
void Step5<dim>::process_solution(void)
{
    TimerOutput::Scope t(computing_timer, "Process system");
    Vector<double> difference_per_cell(triangulation.n_active_cells());

    VectorTools::integrate_difference(dof_handler,
                                      locally_relevant_solution,
                                      Solution<dim>(),
                                      difference_per_cell,
                                      QGauss<dim>(fe.degree * 2 + 1),
                                      VectorTools::L2_norm);
    const double L2_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::L2_norm);

    VectorTools::integrate_difference(dof_handler,
                                      locally_relevant_solution,
                                      Solution<dim>(),
                                      difference_per_cell,
                                      QGauss<dim>(fe.degree * 2 + 1),
                                      VectorTools::H1_seminorm);
    const double H1_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::H1_seminorm);

    const QTrapez<1>     q_trapez;
    const QIterated<dim> q_iterated(q_trapez, fe.degree * 2 + 1);
    VectorTools::integrate_difference(dof_handler,
                                      locally_relevant_solution,
                                      Solution<dim>(),
                                      difference_per_cell,
                                      q_iterated,
                                      VectorTools::Linfty_norm);
    const double Linfty_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::Linfty_norm);

    //const unsigned int n_active_cells = triangulation.n_active_cells();
    const unsigned int n_global_active_cells = triangulation.n_global_active_cells();
    const unsigned int n_dofs         = dof_handler.n_dofs();
    //    pcout << "Cycle " << cycle << ':' << std::endl
    //          << "   Number of active cells:          " << n_active_cells
    //          << std::endl
    //          << "   Number of globally active cells: " << n_global_active_cells
    //          << std::endl
    //          << "   Number of degrees of freedom:    " << n_dofs << std::endl;

    convergence_table.add_value("cells", n_global_active_cells);
    convergence_table.add_value("dofs", n_dofs);
    convergence_table.add_value("L2", L2_error);
    convergence_table.add_value("H1", H1_error);
    convergence_table.add_value("Linfty", Linfty_error);
    convergence_table.add_value("RHS-L2", system_rhs.l2_norm());
}

// @sect4{Step5::solve}

// The solution process again looks mostly like in the previous
// examples. However, we will now use a preconditioned conjugate gradient
// algorithm. It is not very difficult to make this change. In fact, the only
// thing we have to alter is that we need an object which will act as a
// preconditioner. We will use SSOR (symmetric successive overrelaxation),
// with a relaxation factor of 1.2. For this purpose, the
// <code>SparseMatrix</code> class has a function which does one SSOR step,
// and we need to package the address of this function together with the
// matrix on which it should act (which is the matrix to be inverted) and the
// relaxation factor into one object. The <code>PreconditionSSOR</code> class
// does this for us. (<code>PreconditionSSOR</code> class takes a template
// argument denoting the matrix type it is supposed to work on. The default
// value is <code>SparseMatrix@<double@></code>, which is exactly what we need
// here, so we simply stick with the default and do not specify anything in
// the angle brackets.)
//
// Note that for the present case, SSOR doesn't really perform much better
// than most other preconditioners (though better than no preconditioning at
// all). A brief comparison of different preconditioners is presented in the
// Results section of the next tutorial program, step-6.
//
// With this, the rest of the function is trivial: instead of the
// <code>PreconditionIdentity</code> object we have created before, we now use
// the preconditioner we have declared, and the CG solver will do the rest for
// us:
template <int dim>
void Step5<dim>::solve()
{
    //    SolverControl solver_control(1000, 1e-12);
    //    SolverCG<>    solver(solver_control);

    //    PreconditionSSOR<> preconditioner;
    //    preconditioner.initialize(system_matrix, 1.2);

    //    solver.solve(system_matrix, solution, system_rhs, preconditioner);

    //    std::cout << "   " << solver_control.last_step()
    //              << " CG iterations needed to obtain convergence." << std::endl;
}

template <int dim>
void Step5<dim>::solve_with_NOX()
{
    TimerOutput::Scope t(computing_timer, "Solve NOX system");
    //TrilinosWrappers::MPI::Vector local_problem;
    //local_problem.reinit(dof_handler.locally_owned_dofs());
    pcout << locally_relevant_solution.size() << '\n';

    Epetra_Vector local_solution(Copy, locally_relevant_solution.trilinos_vector(), 0);

    //NOX::Epetra::Vector noxSoln(local_solution, NOX::Epetra::Vector::CreateView);

    Teuchos::RCP<Teuchos::ParameterList> nlParamsPtr =
            Teuchos::rcp(new Teuchos::ParameterList);
    Teuchos::ParameterList& nlParams = *nlParamsPtr.get();

    // Set the nonlinear solver method
    nlParams.set("Nonlinear Solver", "Line Search Based");
    //nlParams.set("Nonlinear Solver", "Trust Region Based");

    // Set the printing parameters in the "Printing" sublist
    Teuchos::ParameterList& printParams = nlParams.sublist("Printing");
    printParams.set("Output Precision", 3);
    printParams.set("Output Processor", 0);
    printParams.set("Output Information",
                    //                        NOX::Utils::OuterIteration +
                    //                        NOX::Utils::OuterIterationStatusTest +
                    //                        NOX::Utils::InnerIteration +
                    //                        NOX::Utils::Parameters +
                    //                        NOX::Utils::Details +
                    NOX::Utils::Warning);

    // Create printing utilities
    NOX::Utils utils(printParams);

    // Sublist for line search
    Teuchos::ParameterList& searchParams = nlParams.sublist("Line Search");
    searchParams.set("Method", "Full Step");
    //searchParams.set("Method", "Interval Halving");
    //searchParams.set("Method", "Polynomial");
    //searchParams.set("Method", "NonlinearCG");
    //searchParams.set("Method", "Quadratic");
    //searchParams.set("Method", "More'-Thuente");

    // Sublist for direction
    Teuchos::ParameterList& dirParams = nlParams.sublist("Direction");
    //  dirParams.set("Method", "Modified-Newton");
    //  Teuchos::ParameterList& newtonParams = dirParams.sublist("Modified-Newton");
    //    newtonParams.set("Max Age of Jacobian", 2);
    dirParams.set("Method", "Newton");
    Teuchos::ParameterList& newtonParams = dirParams.sublist("Newton");
    newtonParams.set("Forcing Term Method", "Constant");
    //newtonParams.set("Forcing Term Method", "Type 1");
    //newtonParams.set("Forcing Term Method", "Type 2");
    //newtonParams.set("Forcing Term Minimum Tolerance", 1.0e-4);
    //newtonParams.set("Forcing Term Maximum Tolerance", 0.1);
    //dirParams.set("Method", "Steepest Descent");
    //Teuchos::ParameterList& sdParams = dirParams.sublist("Steepest Descent");
    //sdParams.set("Scaling Type", "None");
    //sdParams.set("Scaling Type", "2-Norm");
    //sdParams.set("Scaling Type", "Quadratic Model Min");
    //dirParams.set("Method", "NonlinearCG");
    //Teuchos::ParameterList& nlcgParams = dirParams.sublist("Nonlinear CG");
    //nlcgParams.set("Restart Frequency", 2000);
    //nlcgParams.set("Precondition", "On");
    //nlcgParams.set("Orthogonalize", "Polak-Ribiere");
    //nlcgParams.set("Orthogonalize", "Fletcher-Reeves");

    // Sublist for linear solver for the Newton method
    Teuchos::ParameterList& lsParams = newtonParams.sublist("Linear Solver");
    lsParams.set("Aztec Solver", "GMRES");
    lsParams.set("Max Iterations", 800);
    lsParams.set("Tolerance", 1e-4);
    lsParams.set("Preconditioner", "None");
    //lsParams.set("Preconditioner", "Ifpack");
    lsParams.set("Max Age Of Prec", 5);

    Teuchos::RCP<ProblemInterface<dim>> interface = Teuchos::rcp(new ProblemInterface<dim>(std::bind(&Step5::calculate_residual_multi_val,
                                                                                                     this,
                                                                                                     std::placeholders::_1,
                                                                                                     std::placeholders::_2,
                                                                                                     std::placeholders::_3),
                                                                                           std::bind(&Step5::calculate_jacobian,
                                                                                                     this,
                                                                                                     std::placeholders::_1),
                                                                                           mpi_communicator,
                                                                                           locally_owned_dofs,
                                                                                           locally_relevant_dofs));

    Teuchos::RCP<NOX::Epetra::MatrixFree> MF =
            Teuchos::rcp(new NOX::Epetra::MatrixFree(printParams, interface, local_solution));
    Teuchos::RCP<NOX::Epetra::FiniteDifference> FD =
            Teuchos::rcp(new NOX::Epetra::FiniteDifference(printParams, interface,
                                                           local_solution));

    // Create the linear system
    Teuchos::RCP<NOX::Epetra::Interface::Required> iReq = interface;
    Teuchos::RCP<NOX::Epetra::Interface::Jacobian> iJac = MF;
    Teuchos::RCP<NOX::Epetra::Interface::Preconditioner> iPrec =
            FD;
    Teuchos::RCP<NOX::Epetra::LinearSystemAztecOO> linSys =
            Teuchos::rcp(new NOX::Epetra::LinearSystemAztecOO(printParams, lsParams,
                                                              iReq, iJac, MF,// iPrec, FD,
                                                              local_solution));

    // Create the Group
    Teuchos::RCP<NOX::Epetra::Group> grp =
            Teuchos::rcp(new NOX::Epetra::Group(printParams, iReq, local_solution,
                                                linSys));

    // Create the convergence tests
    Teuchos::RCP<NOX::StatusTest::NormF> absresid =
            Teuchos::rcp(new NOX::StatusTest::NormF(1.0e-8));
    Teuchos::RCP<NOX::StatusTest::NormF> relresid =
            Teuchos::rcp(new NOX::StatusTest::NormF(*grp.get(), 1.0e-2));
    Teuchos::RCP<NOX::StatusTest::NormUpdate> update =
            Teuchos::rcp(new NOX::StatusTest::NormUpdate(1.0e-5));
    Teuchos::RCP<NOX::StatusTest::NormWRMS> wrms =
            Teuchos::rcp(new NOX::StatusTest::NormWRMS(1.0e-2, 1.0e-8));
    Teuchos::RCP<NOX::StatusTest::Combo> converged =
            Teuchos::rcp(new NOX::StatusTest::Combo(NOX::StatusTest::Combo::AND));
    converged->addStatusTest(absresid);
    converged->addStatusTest(relresid);
    converged->addStatusTest(wrms);
    converged->addStatusTest(update);
    Teuchos::RCP<NOX::StatusTest::MaxIters> maxiters =
            Teuchos::rcp(new NOX::StatusTest::MaxIters(20));
    Teuchos::RCP<NOX::StatusTest::FiniteValue> fv =
            Teuchos::rcp(new NOX::StatusTest::FiniteValue);
    Teuchos::RCP<NOX::StatusTest::Combo> combo =
            Teuchos::rcp(new NOX::StatusTest::Combo(NOX::StatusTest::Combo::OR));
    combo->addStatusTest(fv);
    combo->addStatusTest(converged);
    combo->addStatusTest(maxiters);

    //    #ifdef HAVE_TEUCHOS_EXTENDED
    //        std::stringstream input_file_name;
    //        input_file_name << "input_" << 0 << ".xml";

    //        // Write the parameter list to a file
    //        std::cout << "Writing parameter list to "<< input_file_name.str() << std::endl;
    //        Teuchos::writeParameterListToXmlFile(*nlParamsPtr, input_file_name.str());

    //        // Read in the parameter list from a file
    //        std::cout << "Reading parameter list from " << input_file_name.str() << std::endl;
    //        Teuchos::RCP<Teuchos::ParameterList> finalParamsPtr =
    //                Teuchos::rcp(new Teuchos::ParameterList);
    //        Teuchos::updateParametersFromXmlFile(input_file_name.str(),
    //                                             outArg(*finalParamsPtr));
    //    #else
    Teuchos::RCP<Teuchos::ParameterList> finalParamsPtr = nlParamsPtr;
    //    #endif

    // Create the method
    Teuchos::RCP<NOX::Solver::Generic> solver =
            NOX::Solver::buildSolver(grp, combo, finalParamsPtr);
    NOX::StatusTest::StatusType status;
    {
        TimerOutput::Scope t(computing_timer, "NOX solver");
        status = solver->solve();
    }

    if (status == NOX::StatusTest::Converged)
        utils.out() << "Test Passed!" << std::endl;
    else {
        utils.out() << "Nonlinear solver failed to converge!" << std::endl;
    }

    // Get the Epetra_Vector with the final solution from the solver
    const NOX::Epetra::Group& finalGroup = dynamic_cast<const NOX::Epetra::Group&>(solver->getSolutionGroup());
    const Epetra_Vector& finalSolution = (dynamic_cast<const NOX::Epetra::Vector&>(finalGroup.getX())).getEpetraVector();

    // End Nonlinear Solver **************************************

    // Output the parameter list
    if (utils.isPrintType(NOX::Utils::Parameters)) {
        utils.out() << std::endl << "Final Parameters" << std::endl
                    << "****************" << std::endl;
        solver->getList().print(utils.out());
        utils.out() << std::endl;
    }

    Epetra_Vector epetra_F(View, locally_relevant_solution.trilinos_vector(), 0);
    epetra_F = finalSolution;
    //        solution.reinit(finalSolution.MyLength());
    //        for(size_t i = 0; i < solution.size(); ++i)
    //            solution[i] = finalSolution[i];
}

template <int dim>
void Step5<dim>::solve_with_matrix_jacobian()
{
    TimerOutput::Scope t(computing_timer, "Solve system");
    SolverControl solver_control(dof_handler.n_dofs(), 1e-12);
    SolverGMRES<TrilinosWrappers::MPI::Vector> solver(solver_control);
    TrilinosWrappers::MPI::Vector completely_distributed_solution(locally_owned_dofs, mpi_communicator);

#ifdef USE_JACOBIAN
    PreconditionIdentity preconditioner;
    preconditioner.initialize(system_matrix);
#else
    TrilinosWrappers::PreconditionSSOR preconditioner;
    TrilinosWrappers::PreconditionSSOR::AdditionalData data;

    preconditioner.initialize(system_matrix, data);
#endif

    //        Vector<double> local_residual(dof_handler.n_dofs()), local_solution(dof_handler.n_dofs()), newton_update(dof_handler.n_dofs());

#ifdef USE_JACOBIAN
    jacobian_approximation jacobian(std::bind(&Step5<dim>::calculate_residual_multi_val,
                                              this,
                                              std::placeholders::_1,
                                              std::placeholders::_2,
                                              std::placeholders::_3),
                                    dof_handler.n_dofs(),
                                    solution);
#endif
    //    calculate_residual_multi_val(local_solution, solution, local_residual);
    //    std::cout << "L2-norm before resetting: " << local_residual.l2_norm() << '\n';

    //    solution = 0;

    //    calculate_residual_multi_val(local_solution, solution, local_residual);
    //    std::cout << "L2-norm before solving: " << local_residual.l2_norm() << '\n';

#ifdef USE_JACOBIAN
    solver.solve(jacobian, newton_update, system_rhs, preconditioner);
#else
    solver.solve(system_matrix, completely_distributed_solution, system_rhs, preconditioner);
#endif
    //    SparseDirectUMFPACK inv;
    //    inv.initialize(system_matrix);

    //    inv.vmult(solution, system_rhs);
    //    hanging_node_constraints.distribute(solution);
    hanging_node_constraints.distribute(completely_distributed_solution);

    locally_relevant_solution = completely_distributed_solution;

    //    calculate_residual_multi_val(local_solution, solution, local_residual);
    //    std::cout << "L2-norm after solving: " << local_residual.l2_norm() << '\n';

    //solution.sadd(1, 1, system_update);
}

template <int dim>
void Step5<dim>::solve_with_math_jacobian()
{
    //    Vector<double> residual(dof_handler.n_dofs()),
    //            residual_in_future(dof_handler.n_dofs()),
    //            g(dof_handler.n_dofs()),
    //            v(dof_handler.n_dofs()),
    //            v_new(dof_handler.n_dofs()),
    //            v_eps(dof_handler.n_dofs()),
    //            r(dof_handler.n_dofs()),
    //            r_new(dof_handler.n_dofs()),
    //            temp_solution(dof_handler.n_dofs());
    //    Vector<double> evaluation_point(dof_handler.n_dofs());

    //    temp_solution = 0;
    //    const double epsilon = 1e-6;
    //    double alpha = 0;
    //    double beta = 0;

    //    //calculate_residual(solution, residual);

    //    r = residual;
    //    v = residual;

    //    for(size_t i = 0; i < dof_handler.n_dofs(); ++i)
    //    {
    //        v_eps = v;
    //        v_eps *= epsilon;

    //        calculate_residual_multi_val(v_eps, solution, g);

    //        g -= v;
    //        g /= epsilon;

    //        alpha = 1/(g * v);
    //        std::cout << "alpha I: " << solution << '\n';
    //        alpha *= (residual * residual);
    //        std::cout << "alpha: " << alpha << '\n';

    //        temp_solution.sadd(1, alpha, v);
    //        r_new = r;
    //        r_new.sadd(1, -alpha, g);
    //        if(r.l2_norm() < 1e-8)
    //            break;
    //        beta = (r_new * r_new) / (r * r);
    //        v_new = r_new;
    //        v_new.sadd(1, beta, v);
    //    }
    //    solution += temp_solution;
}


// @sect4{Step5::output_results and setting output flags}

// Writing output to a file is mostly the same as for the previous example,
// but here we will show how to modify some output options and how to
// construct a different filename for each refinement cycle.
template <int dim>
void Step5<dim>::output_results(const unsigned int cycle)
{
    TimerOutput::Scope t(computing_timer, "Print system");
    DataOut<dim> data_out;

    data_out.attach_dof_handler(dof_handler);

    data_out.add_data_vector(locally_relevant_solution, "Calculated_solution");

    Vector<float> subdomain(triangulation.n_active_cells());
    for (size_t i = 0; i < subdomain.size(); ++i)
        subdomain(i) = triangulation.locally_owned_subdomain();
    data_out.add_data_vector(subdomain, "subdomain");

    data_out.build_patches();

    std::ofstream output("solution-" + Utilities::int_to_string(cycle, 2) +
                         "." + Utilities::int_to_string(triangulation.locally_owned_subdomain(), 4) + ".vtu");
    data_out.write_vtu(output);

    if (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)
    {
        std::vector<std::string> filenames;
        for (unsigned int i = 0;
             i < Utilities::MPI::n_mpi_processes(mpi_communicator);
             ++i)
            filenames.push_back("solution-" + Utilities::int_to_string(cycle, 2) +
                                "." + Utilities::int_to_string(i, 4) + ".vtu");
        std::ofstream master_output(
                    "solution-" + Utilities::int_to_string(cycle, 2) + ".pvtu");
        data_out.write_pvtu_record(master_output, filenames);
    }

    convergence_table.set_precision("L2", 3);
    convergence_table.set_precision("H1", 3);
    convergence_table.set_precision("Linfty", 3);
    convergence_table.set_precision("RHS-L2", 3);
    convergence_table.set_scientific("L2", true);
    convergence_table.set_scientific("H1", true);
    convergence_table.set_scientific("Linfty", true);
    convergence_table.set_scientific("RHS-L2", true);

    convergence_table.set_tex_caption("cells", "\\# cells");
    convergence_table.set_tex_caption("dofs", "\\# dofs");
    convergence_table.set_tex_caption("L2", "$L^2$-error");
    convergence_table.set_tex_caption("H1", "$H^1$-error");
    convergence_table.set_tex_caption("Linfty", "$L^\\infty$-error");
    convergence_table.set_tex_caption("RHS-L2", "$L^2$-norm of RHS");

    convergence_table.add_column_to_supercolumn("cells", "n cells");
    std::vector<std::string> new_order;
    new_order.emplace_back("n cells");
    new_order.emplace_back("H1");
    new_order.emplace_back("L2");
    new_order.emplace_back("Linfty");
    convergence_table.set_column_order(new_order);
    convergence_table.evaluate_convergence_rates(
                "L2", ConvergenceTable::reduction_rate);
    convergence_table.evaluate_convergence_rates(
                "L2", ConvergenceTable::reduction_rate_log2);
    convergence_table.evaluate_convergence_rates(
                "H1", ConvergenceTable::reduction_rate);
    convergence_table.evaluate_convergence_rates(
                "H1", ConvergenceTable::reduction_rate_log2);

    if(Utilities::MPI::n_mpi_processes(mpi_communicator) == 0)
    {
        std::cout << "\n";

        convergence_table.write_text(std::cout);
        std::cout << "\n\n";
        convergence_table.clear();
    }

}



// @sect4{Step5::run}

// The second to last thing in this program is the definition of the
// <code>run()</code> function. In contrast to the previous programs, we will
// compute on a sequence of meshes that after each iteration is globally
// refined. The function therefore consists of a loop over 6 cycles. In each
// cycle, we first print the cycle number, and then have to decide what to do
// with the mesh. If this is not the first cycle, we simply refine the
// existing mesh once globally. Before running through these cycles, however,
// we have to generate a mesh:

// In previous examples, we have already used some of the functions from the
// <code>GridGenerator</code> class. Here we would like to read a grid from a
// file where the cells are stored and which may originate from someone else,
// or may be the product of a mesh generator tool.
//
// In order to read a grid from a file, we generate an object of data type
// GridIn and associate the triangulation to it (i.e. we tell it to fill our
// triangulation object when we ask it to read the file). Then we open the
// respective file and initialize the triangulation with the data in the file:
template <int dim>
void Step5<dim>::run()
{
    GridGenerator::hyper_cube(triangulation);

    for (unsigned int cycle = 0; cycle < 6; ++cycle)
    {
        pcout << "Cycle " << cycle << ':' << std::endl;

        if (cycle != 0)
            triangulation.refine_global(1);

        // Now that we have a mesh for sure, we write some output and do all the
        // things that we have already seen in the previous examples.
        pcout << "   Number of active cells: "  //
              << triangulation.n_active_cells() //
              << std::endl                      //
              << "   Total number of cells: "   //
              << triangulation.n_cells()        //
              << std::endl;

        setup_system();
        assemble_system();
        process_solution();
        solve_with_matrix_jacobian();
        process_solution();
        output_results(cycle);
        solve_with_NOX();
        output_results(100 + cycle);
        computing_timer.print_summary();
        computing_timer.reset();
    }
    //triangulation.refine_global(1);
    //setup_system();
    //assemble_system();
}


// @sect3{The <code>main</code> function}

// The main function looks mostly like the one in the previous example, so we
// won't comment on it further:
int main(int argc, char *argv[])
{
    Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, numbers::invalid_unsigned_int);
    Step5<2> laplace_problem_2d;
    laplace_problem_2d.run();
    return 0;
}

[deal.II] Integrating a deal.II-specific function in a NOX-Interface for MPI-threads

Reply via email to