Hi everyone, I am working on incompressible flow problems and stumbled upon an issue when calling PETScWrappers::SparseMatrix::mmult(), but before I describe the problem in more detail, let me comment on the basic building blocks of the MWE:
(i) parallel::distributed::Triangulation & either PETSc or Trilinos linear algebra packages (ii) dim-dimensional vector-valued FE space for velocity components & scalar-valued FE space for pressure, simply constructed via: FESystem<dim> fe (FE_Q<dim>(vel_degree), dim, FE_Q<dim>(press_degree), 1); So, after integrating the weak form -- or just filling the matrices with some entries -- we end up with a block system A u + B p = f C u + D p = g. To construct some preconditioners, we have to perform some matrix-matrix products: either for the Schur complement (a) S = D - C inv(diag(A)) B or some A_gamma (b) A_gamma = A + gamma * B inv(diag(Mp)) C. Comletely ignoring now, why that might be necessary or not (I know that there is the possibility of assembling a grad-div term and using a Laplacian on the pressure space to account for the reaction term, but that is not really an option in the stabilized case), we need those matrix products, and here comes the problem: using either PETSc or Trilinos I get identical matrix-products when calling mmult(), BUT when using PETSc, the RAM is slowly but steadily filled (up to 500GB on our local cluster) I came up with the MWE attached, which does nothing else than initializing the system and then constructs the matrix product 1000 times in a row. Am I doing anything wrong or is this supposed to be used differently? I am using dealii v.9.0.1 installed via candi, so maybe the old version is the reason. Any suggestions? Any help would be greatly appreciated! Bonus question: Is there a way similar to hand the sparsity patterns over to the mmult function? (For the dealii::SparseMatrix there is, which is why I am asking) Kind regards, Richard -- The deal.II project is located at http://www.dealii.org/ For mailing list/forum options, see https://groups.google.com/d/forum/dealii?hl=en --- You received this message because you are subscribed to the Google Groups "deal.II User Group" group. To unsubscribe from this group and stop receiving emails from it, send an email to dealii+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/dealii/68896f29-c57f-497d-9338-33a75094ec9do%40googlegroups.com.
## # CMake script for blockLDU-fsi_(...) program: ## # Set the name of the project and target: SET(TARGET "mmult_mwe") # Declare all source files the target consists of: SET(TARGET_SRC ${TARGET}.cc # You can specify additional files here! ) # Usually, you will not need to modify anything beyond this point... CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8) FIND_PACKAGE(deal.II 9.0.1 QUIET HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR} ) IF(NOT ${deal.II_FOUND}) MESSAGE(FATAL_ERROR "\n" "*** Could not locate deal.II. ***\n\n" "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n" "or set an environment variable \"DEAL_II_DIR\" that contains this path." ) ENDIF() DEAL_II_INITIALIZE_CACHED_VARIABLES() PROJECT(${TARGET}) DEAL_II_INVOKE_AUTOPILOT()
/* This code is licensed under the "GNU GPL version 2 or later". See license.txt or https://www.gnu.org/licenses/gpl-2.0.html Copyright 2019: Richard Schussnig */ // Include files #include <deal.II/base/quadrature_lib.h> #include <deal.II/base/logstream.h> #include <deal.II/base/function.h> #include <deal.II/base/utilities.h> #include <deal.II/base/timer.h> #include <deal.II/base/conditional_ostream.h> #include <deal.II/base/index_set.h> #include <deal.II/lac/vector.h> #include <deal.II/lac/block_vector.h> #include <deal.II/lac/full_matrix.h> #include <deal.II/lac/block_sparse_matrix.h> #include <deal.II/lac/sparse_direct.h> #include <deal.II/lac/generic_linear_algebra.h> #include <deal.II/lac/petsc_parallel_sparse_matrix.h> #include <deal.II/lac/petsc_parallel_vector.h> #include <deal.II/lac/petsc_solver.h> #include <deal.II/lac/petsc_precondition.h> #include <deal.II/lac/trilinos_solver.h> #include <deal.II/lac/sparsity_tools.h> #include <deal.II/lac/constraint_matrix.h> #include <deal.II/lac/solver_cg.h> #include <deal.II/lac/solver_gmres.h> #include <deal.II/lac/solver_bicgstab.h> #include <deal.II/lac/solver_minres.h> #include <deal.II/lac/precondition.h> #include <deal.II/lac/block_matrix_array.h> #include <deal.II/grid/tria.h> #include <deal.II/grid/grid_generator.h> #include <deal.II/grid/tria_accessor.h> #include <deal.II/grid/tria_iterator.h> #include <deal.II/grid/tria_boundary_lib.h> #include <deal.II/grid/grid_tools.h> #include <deal.II/grid/grid_in.h> #include <deal.II/grid/grid_out.h> #include <deal.II/grid/manifold_lib.h> #include <deal.II/dofs/dof_handler.h> #include <deal.II/dofs/dof_renumbering.h> #include <deal.II/dofs/dof_accessor.h> #include <deal.II/dofs/dof_tools.h> #include <deal.II/fe/fe_q.h> #include <deal.II/fe/fe_dgq.h> #include <deal.II/fe/fe_dgp.h> #include <deal.II/fe/fe_system.h> #include <deal.II/fe/fe_values.h> #include <deal.II/fe/mapping_q1.h> #include <deal.II/numerics/vector_tools.h> #include <deal.II/numerics/matrix_tools.h> #include <deal.II/numerics/error_estimator.h> #include <deal.II/numerics/data_out.h> // #include <deal.II/numerics/solution_transfer.h> #include <deal.II/distributed/solution_transfer.h> #include <deal.II/distributed/tria.h> #include <deal.II/distributed/grid_refinement.h> // (Block-)LinearOperator and operations among those for the sub-solvers. #include <deal.II/lac/packaged_operation.h> #include <deal.II/lac/linear_operator.h> #include <deal.II/lac/block_linear_operator.h> // Parameter handler for input-file processing at runtime. #include <deal.II/base/parameter_handler.h> // Linear algebra packages - switch between PETSc & Trilinos //#define FORCE_USE_OF_TRILINOS // ### namespace LA { #if defined(DEAL_II_WITH_PETSC) && !(defined(DEAL_II_WITH_TRILINOS) && defined(FORCE_USE_OF_TRILINOS)) using namespace dealii::LinearAlgebraPETSc; #define USE_PETSC_LA #elif defined(DEAL_II_WITH_TRILINOS) using namespace dealii::LinearAlgebraTrilinos; #else #error DEAL_II_WITH_PETSC or DEAL_II_WITH_TRILINOS required #endif } // C++ #include <fstream> #include <iostream> #include <sstream> //deal.II using namespace dealii; // Define flow problem. template <int dim> class flow_problem { public: flow_problem (const FESystem<dim> &fe); ~flow_problem (); void run (); private: // Create system matrix, rhs and distribute degrees of freedom. void setup_system (); void inv_diag_matrix(LA::MPI::Vector &inv_diag_matrix, const unsigned int partitioning_block, const LA::MPI::SparseMatrix &matrix, bool &inversion_ok); void parallel_print_matrix(LA::MPI::SparseMatrix &matrix, const std::string file_dest); // Function to scale the rows of a matrix with a given vector. void scale_block_rows_by_vec( const LA::MPI::SparseMatrix &matrix_in, LA::MPI::SparseMatrix &matrix_out, const LA::MPI::Vector &diag_vec); const FESystem<dim> &fe; MPI_Comm mpi_communicator; parallel::distributed::Triangulation<dim> triangulation; DoFHandler<dim> dof_handler; ConditionalOStream pcout; // Distributed vectors. LA::MPI::BlockSparseMatrix system_matrix; LA::MPI::SparseMatrix B_inv_diag_D_C,D_minus_C_inv_diag_A_B; ConstraintMatrix constraints; std::vector<IndexSet> own_partitioning, rel_partitioning; }; // We are going to use the finite element discretization: // Q_(k+1)^c for solid and fluid displacements and velocities // and Q_k^dc for the pressure. template <int dim> flow_problem<dim>::flow_problem (const FESystem<dim> &fe) : fe (fe), mpi_communicator (MPI_COMM_WORLD), triangulation (mpi_communicator, typename Triangulation<dim>::MeshSmoothing (Triangulation<dim>::smoothing_on_refinement | Triangulation<dim>::smoothing_on_coarsening)), dof_handler (triangulation), pcout (std::cout, (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)) {} // Destructor. template <int dim> flow_problem<dim>::~flow_problem () {} // System setup template <int dim> void flow_problem<dim>::setup_system () { // Get some mesh. GridGenerator::hyper_cube (triangulation, -0.5, 0.5, false); triangulation.refine_global (4); // Clear existing matrices. system_matrix.clear (); B_inv_diag_D_C.clear(); D_minus_C_inv_diag_A_B.clear(); // Distribute DoFs. dof_handler.distribute_dofs (fe); // Cuthill_McKee renumbering. DoFRenumbering::Cuthill_McKee (dof_handler); // Component numbers. std::vector<unsigned int> block_component (dim+1,0); // velocity block_component[dim] = 1; // pressure // Count DoFs per block. std::vector<unsigned int> dofs_per_block (2); DoFTools::count_dofs_per_block (dof_handler, dofs_per_block, block_component); const unsigned int n_v = dofs_per_block[0], n_p = dofs_per_block[1]; // Count cells per processor. unsigned int n_own_active_cells = triangulation.n_locally_owned_active_cells(); n_own_active_cells = Utilities::MPI::sum(n_own_active_cells, mpi_communicator); // Print cell and dof counts. pcout << "Cells: " << n_own_active_cells << " , " << "DoFs: " << dof_handler.n_dofs () << " ( n_v = " << n_v << " , " << "n_p = " << n_p << " ) " << std::endl; // Renumber by block components. DoFRenumbering::component_wise (dof_handler, block_component); // We split up the IndexSet for locally owned and locally relevant DoFs // into IndexSets based on how we want to create the block matrices // and vectors. IndexSet own_dofs, rel_dofs; own_dofs = dof_handler.locally_owned_dofs(); DoFTools::extract_locally_relevant_dofs (dof_handler, rel_dofs); // Block system; velocities and pressure in seperate blocks. own_partitioning.resize(2); own_partitioning[0] = own_dofs.get_view(0 , n_v); own_partitioning[1] = own_dofs.get_view(n_v, n_v + n_p); rel_partitioning.resize(2); rel_partitioning[0] = rel_dofs.get_view(0 , n_v); rel_partitioning[1] = rel_dofs.get_view(n_v, n_v+n_p); // Get zero Dirichlet constraints on vector space. { constraints.clear(); constraints.reinit (rel_dofs); DoFTools::make_hanging_node_constraints (dof_handler, constraints); std::vector<bool> component_mask (dim+1, true); component_mask[dim] = false; unsigned int dirichlet_boundary_id = 0; VectorTools::interpolate_boundary_values (dof_handler, dirichlet_boundary_id, ZeroFunction<dim>(dim+1), constraints, component_mask); constraints.close (); } // Reinit system_matrix. const unsigned int n_blocks = 2; { BlockDynamicSparsityPattern bdsp (n_blocks,n_blocks); std::vector <unsigned int> n_dof_per_block(n_blocks); n_dof_per_block[0] = n_v; n_dof_per_block[1] = n_p; for (unsigned int i=0; i<n_blocks; i++) { for (unsigned int j=0; j<n_blocks; j++) { bdsp.block(i,j).reinit(n_dof_per_block[i], n_dof_per_block[j]); } } bdsp.compress(); bdsp.collect_sizes(); DoFTools::make_sparsity_pattern (dof_handler, bdsp, constraints, true /*keep constraints for mmult*/, Utilities::MPI::this_mpi_process(mpi_communicator)); SparsityTools::distribute_sparsity_pattern (bdsp, dof_handler.locally_owned_dofs_per_processor(), mpi_communicator, rel_dofs); // // Print out the sparsity pattern // if(Utilities::MPI::n_mpi_processes() == 1) // { sparsity_pattern.copy_from (bdsp); // std::ofstream out ("sparsity_pattern_blockwise.gpl"); // sparsity_pattern.print_gnuplot(out); // // in terminal : gnuplot [ENTER] plot "sparsity_pattern.gpl" // } // Init matrix with the sparsity pattern. system_matrix.reinit (own_partitioning, bdsp, mpi_communicator); // All the commented stuff below was not doing the job well enough. ### // // Get sparsity pattern to store A_gamma. // { // DynamicSparsityPattern B_C (bdsp.block(0,1).n_rows(), bdsp.block(1,0).n_cols()); // // // Add entries from A. // if (true) // { // DynamicSparsityPattern::iterator it_A = bdsp.block(0,0).begin(), // end_A = bdsp.block(0,0).end(); // for (; it_A != end_A; ++it_A) // { B_C.add (it_A->row(), it_A->column()); // } // } // else // { // #ifdef USE_PETSC_LA // // missing. // #else // std::vector<unsigned int> idx_vec; // own_partitioning[0].fill_index_vector(idx_vec); // // for (unsigned int idx=0; idx<idx_vec.size(); idx++) // { // const unsigned int row = idx_vec [idx]; // // // Loop over entries in that row. // LA::MPI::SparseMatrix::iterator it_row = system_matrix.block(0,0).begin(row), // end_row = system_matrix.block(0,0).end(row); // for (; it_row != end_row; ++it_row) // { B_C.add (row, it_row->column()); // } // } // #endif // } // // // Compute one matrix product and add entries to B_C. // system_matrix.block(0,1).mmult(B_inv_diag_D_C,system_matrix.block(1,0)); // // #ifdef USE_PETSC_LA // // missing. // #else // { // std::vector<unsigned int> idx_vec; // own_partitioning[0].fill_index_vector(idx_vec); // // for (unsigned int idx=0; idx<idx_vec.size(); idx++) // { // const unsigned int row = idx_vec [idx]; // // // Loop over entries in that row. // LA::MPI::SparseMatrix::iterator it_row = B_inv_diag_D_C.begin(row), // end_row = B_inv_diag_D_C.end(row); // for (; it_row != end_row; ++it_row) // { B_C.add (row, it_row->column()); // } // } // } // #endif // // system_matrix.block(0,0).reinit(own_partitioning[0],own_partitioning[0],B_C,mpi_communicator); // B_inv_diag_D_C.reinit(system_matrix.block(0,0)); // } // // // Get sparsity pattern to compute the pressure Schur complement. // { // // // compute_mmult_sparsity_pattern(). // DynamicSparsityPattern C_B (bdsp.block(1,0).n_rows(),bdsp.block(0,1).n_cols()); // { // DynamicSparsityPattern::iterator it_left = bdsp.block(1,0).begin(), // end_left = bdsp.block(1,0).end(); // for (; it_left != end_left; ++it_left) // { // const unsigned int j = it_left->column(); // // DynamicSparsityPattern::iterator it_right = bdsp.block(0,1).begin(j), // end_right = bdsp.block(0,1).end(j); // for (; it_right != end_right; ++it_right) // { C_B.add (it_left->row(), it_right->column()); // } // } // } // // // Loop over owned rows, and add entries from D. // { // DynamicSparsityPattern::iterator it_D = bdsp.block(1,1).begin(), // end_D = bdsp.block(1,1).end(); // for (; it_D != end_D; ++it_D) // { C_B.add (it_D->row(), it_D->column()); // } // } // // // Reinit D-block and matrix product. // system_matrix.block(1,1).reinit (own_partitioning[1],own_partitioning[1], C_B, mpi_communicator); // D_minus_C_inv_diag_A_B.reinit (system_matrix.block(1,1)); // } } } template <int dim> void flow_problem<dim>::inv_diag_matrix(LA::MPI::Vector &inv_diag_matrix, const unsigned int partitioning_block, const LA::MPI::SparseMatrix &matrix, bool &inversion_ok) { // Function to return the inverse of the diagonal approximation of a matrix. inv_diag_matrix.reinit(own_partitioning[partitioning_block], mpi_communicator); inv_diag_matrix = 0; inversion_ok = true; std::vector <unsigned int> index_vec; own_partitioning[partitioning_block].fill_index_vector(index_vec); double val; for (unsigned int i=0; i<index_vec.size(); i++) { val = matrix.diag_element(index_vec[i]); if (std::abs(val) < 1e-16 || std::isnan(val)) { inv_diag_matrix[index_vec[i]] = 0.0; inversion_ok = false; } else { inv_diag_matrix[index_vec[i]] = 1.0/val; } } inv_diag_matrix.compress (VectorOperation::insert); // Warning output. if (!inversion_ok) { pcout << "Encountered some zero on diag(matrix), ignored the entry." << std::endl; } } template <int dim> void flow_problem<dim>:: parallel_print_matrix(LA::MPI::SparseMatrix &matrix, const std::string file_dest) { // Print contributions to the global matrix in a file. for (unsigned int i=0; i<=Utilities::MPI::n_mpi_processes(mpi_communicator); i++) if(Utilities::MPI::this_mpi_process(mpi_communicator) == i) { std::string str_name = "_proc" + Utilities::int_to_string(i,4) + ".txt"; std::ofstream mat_file(file_dest + str_name); matrix.print(mat_file,false); } } // Run. template <int dim> void flow_problem<dim>::run () { // Setup matrices. setup_system (); // Outer loop to repeatedly perform the mmult(). const unsigned int max_k = 1000; for (unsigned int k=0; k<max_k; k++) { pcout << "k = " << k << " / " << max_k << std::endl; // Reset matrix. system_matrix = 0; // Fill matrices with dummy values. const unsigned int dofs_per_cell = fe.dofs_per_cell; std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell); typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(), endc = dof_handler.end(); for (; cell!=endc; ++cell) { if (cell->is_locally_owned()) { cell->get_dof_indices (local_dof_indices); for (unsigned int i = 0; i < dofs_per_cell; ++i) { for (unsigned int j=0; j< dofs_per_cell; ++j) { system_matrix.add (local_dof_indices[i],local_dof_indices[j],1.0); } } } } // Compress matrices. system_matrix.compress(VectorOperation::add); // Get the diagonals of A and D blocks. LA::MPI::Vector inv_diag_A, inv_diag_D; bool inv_ok; flow_problem<dim>::inv_diag_matrix(inv_diag_A, 0, system_matrix.block(0,0), inv_ok); flow_problem<dim>::inv_diag_matrix(inv_diag_D, 1, system_matrix.block(1,1), inv_ok); { // Get A = A + gamma B inv(diag(D)) C. B_inv_diag_D_C = 0; system_matrix.block(0,1).mmult (B_inv_diag_D_C, system_matrix.block(1,0), inv_diag_D); B_inv_diag_D_C.add (+1.0,system_matrix.block(0,0)); // // Print that matrix. // #ifdef USE_PETSC_LA // parallel_print_matrix(B_inv_diag_D_C, "./B_inv_diag_D_C_via_petsc"); // #else // parallel_print_matrix(B_inv_diag_D_C, "./B_inv_diag_D_C_via_trilinos"); // #endif if (k==0) { // ### system_matrix.block(0,0).reinit(B_inv_diag_D_C); } system_matrix.block(0,0).add (+1.0,B_inv_diag_D_C); } { // Get S = D - C inv(diag(A)) B in this scope. D_minus_C_inv_diag_A_B = 0; inv_diag_A *= -1.0; { system_matrix.block(1,0).mmult (D_minus_C_inv_diag_A_B, system_matrix.block(0,1), inv_diag_A); D_minus_C_inv_diag_A_B.add (+1.0, system_matrix.block(1,1)); } } } } // Main function. int main (int argc, char *argv[]) { try { // NOTE: SWITCH BETWEEN PETSC & TRILINOS IN LINE 78 (marked by ###). Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); deallog.depth_console (0); const unsigned int dim = 3, vel_degree = 1, press_degree = 1; FESystem<dim> fe (FE_Q<dim>(vel_degree), dim, FE_Q<dim>(press_degree), 1); flow_problem<dim> flow_problem(fe); flow_problem.run (); } catch (std::exception &exc) { std::cerr << std::endl << std::endl << "----------------------------------------------------" << std::endl; std::cerr << "Exception on processing: " << std::endl << exc.what() << std::endl << "Aborting!" << std::endl << "----------------------------------------------------" << std::endl; return 1; } catch (...) { std::cerr << std::endl << std::endl << "----------------------------------------------------" << std::endl; std::cerr << "Unknown exception!" << std::endl << "Aborting!" << std::endl << "----------------------------------------------------" << std::endl; return 1; } return 0; }