I see now, the TaoSetup_PDIPM() code explicitly builds the large matrix by calling MatGetRow() on the smaller matrices, no matrix abstractions at all and assuming a global AIJ storage of the large matrix.
I think this can be made more general by building a MatNest() from the smaller matrices which allows the smaller matrices to have whatever unique structure is appropriate for them. Of course, if needed, for example, for a direct solver the MatNest can convert itself to a global AIJ matrix and get the current result. As Pierre rightly points out, MPIAIJ is just not a good format for many constrained optimization problems. This has been something seriously neglected in PETSc/TAO. I don't think any single format is ideal for such problems, likely hybrid formats are needed and this gets super tricky when one needs to use direct solvers such a Cholesky. If MUMPS has non-row oriented storage formats we could leverage that but I don't know if it does. Barry ierr = MatGetOwnershipRange(tao->hessian,&rjstart,NULL);CHKERRQ(ierr); for (i=0; i<pdipm->nx; i++){ row = rstart + i; ierr = MatGetRow(tao->hessian,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { while (aj[j] >= cranges[proc+1]) proc++; col = aj[j] - cranges[proc] + Jranges[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(tao->hessian,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); if (pdipm->ng) { /* Insert grad g' */ ierr = MatGetRow(jac_equality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(tao->jacobian_equality,&ranges);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { /* find row ownership of */ while (aj[j] >= ranges[proc+1]) proc++; nx_all = rranges[proc+1] - rranges[proc]; col = aj[j] - ranges[proc] + Jranges[proc] + nx_all; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(jac_equality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } /* Insert Jce_xfixed^T' */ if (pdipm->nxfixed) { ierr = MatGetRow(Jce_xfixed_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(pdipm->Jce_xfixed,&ranges);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { /* find row ownership of */ while (aj[j] >= ranges[proc+1]) proc++; nx_all = rranges[proc+1] - rranges[proc]; col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + ng_all[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(Jce_xfixed_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } if (pdipm->nh) { /* Insert -grad h' */ ierr = MatGetRow(jac_inequality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(tao->jacobian_inequality,&ranges);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { /* find row ownership of */ while (aj[j] >= ranges[proc+1]) proc++; nx_all = rranges[proc+1] - rranges[proc]; col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + nce_all[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(jac_inequality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } /* Insert Jci_xb^T' */ ierr = MatGetRow(Jci_xb_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(pdipm->Jci_xb,&ranges);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { /* find row ownership of */ while (aj[j] >= ranges[proc+1]) proc++; nx_all = rranges[proc+1] - rranges[proc]; col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + nce_all[proc] + nh_all[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(Jci_xb_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } /* 2nd Row block of KKT matrix: [grad Ce, 0, 0, 0] */ if (pdipm->Ng) { ierr = MatGetOwnershipRange(tao->jacobian_equality,&rjstart,NULL);CHKERRQ(ierr); for (i=0; i < pdipm->ng; i++){ row = rstart + pdipm->off_lambdae + i; ierr = MatGetRow(tao->jacobian_equality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { while (aj[j] >= cranges[proc+1]) proc++; col = aj[j] - cranges[proc] + Jranges[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); /* grad g */ } ierr = MatRestoreRow(tao->jacobian_equality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } } /* Jce_xfixed */ if (pdipm->Nxfixed) { ierr = MatGetOwnershipRange(pdipm->Jce_xfixed,&Jcrstart,NULL);CHKERRQ(ierr); for (i=0; i < (pdipm->nce - pdipm->ng); i++){ row = rstart + pdipm->off_lambdae + pdipm->ng + i; ierr = MatGetRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr); if (nc != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"nc != 1"); proc = 0; j = 0; while (cols[j] >= cranges[proc+1]) proc++; col = cols[j] - cranges[proc] + Jranges[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); ierr = MatRestoreRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr); } } /* 3rd Row block of KKT matrix: [ gradCi, 0, 0, -I] */ if (pdipm->Nh) { ierr = MatGetOwnershipRange(tao->jacobian_inequality,&rjstart,NULL);CHKERRQ(ierr); for (i=0; i < pdipm->nh; i++){ row = rstart + pdipm->off_lambdai + i; ierr = MatGetRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); proc = 0; for (j=0; j < nc; j++) { while (aj[j] >= cranges[proc+1]) proc++; col = aj[j] - cranges[proc] + Jranges[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); /* grad h */ } ierr = MatRestoreRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr); } /* -I */ for (i=0; i < pdipm->nh; i++){ row = rstart + pdipm->off_lambdai + i; col = rstart + pdipm->off_z + i; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } } /* Jci_xb */ ierr = MatGetOwnershipRange(pdipm->Jci_xb,&Jcrstart,NULL);CHKERRQ(ierr); for (i=0; i < (pdipm->nci - pdipm->nh); i++){ row = rstart + pdipm->off_lambdai + pdipm->nh + i; ierr = MatGetRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr); if (nc != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"nc != 1"); proc = 0; for (j=0; j < nc; j++) { while (cols[j] >= cranges[proc+1]) proc++; col = cols[j] - cranges[proc] + Jranges[proc]; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } ierr = MatRestoreRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr); /* -I */ col = rstart + pdipm->off_z + pdipm->nh + i; ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); } /* 4-th Row block of KKT matrix: Z and Ci */ for (i=0; i < pdipm->nci; i++) { row = rstart + pdipm->off_z + i; cols1[0] = rstart + pdipm->off_lambdai + i; cols1[1] = row; ierr = MatPreallocateSet(row,2,cols1,dnz,onz);CHKERRQ(ierr); } > On Sep 15, 2020, at 4:03 PM, Abhyankar, Shrirang G > <shrirang.abhyan...@pnnl.gov> wrote: > > There is some code in PDIPM that copies over matrix elements from user > jacobians/hessians to the big KKT matrix. I am not sure how/if that will work > with MatMPI_Column. We’ll have to try out and we need an example for that 😊 > > Thanks, > Shri > > > From: Barry Smith <bsm...@petsc.dev> > Date: Tuesday, September 15, 2020 at 1:21 PM > To: Pierre Jolivet <pierre.joli...@enseeiht.fr> > Cc: "Abhyankar, Shrirang G" <shrirang.abhyan...@pnnl.gov>, PETSc Development > <petsc-dev@mcs.anl.gov> > Subject: Re: [petsc-dev] PDIPDM questions > > > Pierre, > > Based on my previous mail I am hoping that the PDIPM algorithm itself > won't need a major refactorization to be scalable, only a custom matrix type > is needed to store and compute with the Hessian in a scalable way. > > Barry > > > >> On Sep 15, 2020, at 12:50 PM, Pierre Jolivet <pierre.joli...@enseeiht.fr >> <mailto:pierre.joli...@enseeiht.fr>> wrote: >> >> >> >> >>> On 15 Sep 2020, at 5:40 PM, Abhyankar, Shrirang G >>> <shrirang.abhyan...@pnnl.gov <mailto:shrirang.abhyan...@pnnl.gov>> wrote: >>> >>> Pierre, >>> You are right. There are a few MatMultTransposeAdd that may need >>> conforming layouts for the equality/inequality constraint vectors and >>> equality/inequality constraint Jacobian matrices. I need to check if that’s >>> the case. We only have ex1 example currently, we need to add more examples. >>> We are currently working on making PDIPM robust and while doing it will >>> work on adding another example. >>> >>> Very naive question, but given that I have a single constraint, how do I >>> split a 1 x N matrix column-wise? I thought it was not possible. >>> >>> When setting the size of the constraint vector, you need to set the local >>> size on one rank to 1 and all others to zero. For the Jacobian, the local >>> row size on that rank will be 1 and all others to zero. The column layout >>> for the Jacobian should follow the layout for vector x. So each rank will >>> set the local column size of the Jacobian to local size of x. >> >> That is assuming I don’t want x to follow the distribution of the Hessian, >> which is not my case. >> Is there some plan to make PDIPM handle different layouts? >> I hope I’m not the only one thinking that having a centralized Hessian when >> there is a single constraint is not scalable? >> >> Thanks, >> Pierre >> >>> Shri >>> >>>> On 15 Sep 2020, at 2:21 AM, Abhyankar, Shrirang G >>>> <shrirang.abhyan...@pnnl.gov <mailto:shrirang.abhyan...@pnnl.gov>> wrote: >>>> >>>> Hello Pierre, >>>> PDIPM works in parallel so you can have distributed Hessian, Jacobians, >>>> constraints, variables, gradients in any layout you want. If you are >>>> using a DM then you can have it generate the Hessian. >>> >>> Could you please show an example where this is the case? >>> pdipm->x, which I’m assuming is a working vector, is both used as input for >>> Hessian and Jacobian functions, e.g., >>> https://gitlab.com/petsc/petsc/-/blob/master/src/tao/constrained/impls/ipm/pdipm.c#L369 >>> >>> <https://gitlab.com/petsc/petsc/-/blob/master/src/tao/constrained/impls/ipm/pdipm.c#L369> >>> (Hessian) + >>> https://gitlab.com/petsc/petsc/-/blob/master/src/tao/constrained/impls/ipm/pdipm.c#L473 >>> >>> <https://gitlab.com/petsc/petsc/-/blob/master/src/tao/constrained/impls/ipm/pdipm.c#L473> >>> (Jacobian) >>> I thus doubt that it is possible to have different layouts? >>> In practice, I end up with the following error when I try this (2 >>> processes, distributed Hessian with centralized Jacobian): >>> [1]PETSC ERROR: --------------------- Error Message >>> -------------------------------------------------------------- >>> [1]PETSC ERROR: Nonconforming object sizes >>> [1]PETSC ERROR: Vector wrong size 14172 for scatter 0 (scatter reverse and >>> vector to != ctx from size) >>> [1]PETSC ERROR: #1 VecScatterBegin() line 96 in >>> /Users/jolivet/Documents/repositories/petsc/src/vec/vscat/interface/vscatfce.c >>> [1]PETSC ERROR: #2 MatMultTransposeAdd_MPIAIJ() line 1223 in >>> /Users/jolivet/Documents/repositories/petsc/src/mat/impls/aij/mpi/mpiaij.c >>> [1]PETSC ERROR: #3 MatMultTransposeAdd() line 2648 in >>> /Users/jolivet/Documents/repositories/petsc/src/mat/interface/matrix.c >>> [0]PETSC ERROR: Nonconforming object sizes >>> [0]PETSC ERROR: Vector wrong size 13790 for scatter 27962 (scatter reverse >>> and vector to != ctx from size) >>> [1]PETSC ERROR: #4 TaoSNESFunction_PDIPM() line 510 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/constrained/impls/ipm/pdipm.c >>> [0]PETSC ERROR: #5 TaoSolve_PDIPM() line 712 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/constrained/impls/ipm/pdipm.c >>> [1]PETSC ERROR: #6 TaoSolve() line 222 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/interface/taosolver.c >>> [0]PETSC ERROR: #1 VecScatterBegin() line 96 in >>> /Users/jolivet/Documents/repositories/petsc/src/vec/vscat/interface/vscatfce.c >>> [0]PETSC ERROR: #2 MatMultTransposeAdd_MPIAIJ() line 1223 in >>> /Users/jolivet/Documents/repositories/petsc/src/mat/impls/aij/mpi/mpiaij.c >>> [0]PETSC ERROR: #3 MatMultTransposeAdd() line 2648 in >>> /Users/jolivet/Documents/repositories/petsc/src/mat/interface/matrix.c >>> [0]PETSC ERROR: #4 TaoSNESFunction_PDIPM() line 510 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/constrained/impls/ipm/pdipm.c >>> [0]PETSC ERROR: #5 TaoSolve_PDIPM() line 712 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/constrained/impls/ipm/pdipm.c >>> [0]PETSC ERROR: #6 TaoSolve() line 222 in >>> /Users/jolivet/Documents/repositories/petsc/src/tao/interface/taosolver.c >>> >>> I think this can be reproduced by ex1.c by just distributing the Hessian >>> instead of having it centralized on rank 0. >>> >>> >>> >>>> Ideally, you want to have the layout below to minimize movement of >>>> matrix/vector elements across ranks. >>>> · The layout of vectors x, bounds on x, and gradient is same. >>>> · The row layout of the equality/inequality Jacobian is same as >>>> the equality/inequality constraint vector layout. >>>> · The column layout of the equality/inequality Jacobian is same as >>>> that for x. >>> >>> Very naive question, but given that I have a single constraint, how do I >>> split a 1 x N matrix column-wise? I thought it was not possible. >>> >>> Thanks, >>> Pierre >>> >>> >>> >>>> · The row and column layout for the Hessian is same as x. >>>> >>>> The tutorial example ex1 is extremely small (only 2 variables) so its >>>> implementation is very simplistic. I think, in parallel, it ships off >>>> constraints etc. to rank 0. It’s not an ideal example w.r.t demonstrating >>>> a parallel implementation. We aim to add more examples as we develop >>>> PDIPM. If you have an example to contribute then we would most welcome it >>>> and provide help on adding it. >>>> >>>> Thanks, >>>> Shri >>>> From: petsc-dev <petsc-dev-boun...@mcs.anl.gov >>>> <mailto:petsc-dev-boun...@mcs.anl.gov>> on behalf of Pierre Jolivet >>>> <pierre.joli...@enseeiht.fr <mailto:pierre.joli...@enseeiht.fr>> >>>> Date: Monday, September 14, 2020 at 1:52 PM >>>> To: PETSc Development <petsc-dev@mcs.anl.gov >>>> <mailto:petsc-dev@mcs.anl.gov>> >>>> Subject: [petsc-dev] PDIPDM questions >>>> >>>> Hello, >>>> In my quest to help users migrate from Ipopt to Tao, I’ve a new question. >>>> When looking at src/tao/constrained/tutorials/ex1.c, it seems that almost >>>> everything is centralized on rank 0 (local sizes are 0 but on rank 0). >>>> I’d like to have my Hessian distributed more naturally, as in (almost?) >>>> all other SNES/TS examples, but still keep the Jacobian of my equality >>>> constraint, which is of dimension 1 x N (N >> 1), centralized on rank 0. >>>> Is this possible? >>>> If not, is it possible to supply the transpose of the Jacobian, of >>>> dimension N x 1, which could then be distributed row-wise like the Hessian? >>>> Or maybe use some trick to distribute a MatAIJ/MatDense of dimension 1 x N >>>> column-wise? Use a MatNest with as many blocks as processes? >>>> >>>> So, just to sum up, how can I have a distributed Hessian with a Jacobian >>>> with a single row? >>>> >>>> Thanks in advance for your help, >>>> Pierre >> >> > >