[OMPI users] Problem with implementation of Foxa algorithm

Surivinta Surivinta Wed, 23 Sep 2015 19:13:10 -0400 (EDT)

Hi everybody!
I try implement Fox algorithm via mpi, but got some errors (see below)
Can someone explain how fix it or give a way for search.
Source code attached to letter


errors:
[estri_mobile:6337] *** An error occurred in MPI_Gather
[estri_mobile:6337] *** reported by process [1826816001,0]
[estri_mobile:6337] *** on communicator MPI COMMUNICATOR 4 SPLIT FROM 3
[estri_mobile:6337] *** MPI_ERR_COUNT: invalid count argument
[estri_mobile:6337] *** MPI_ERRORS_ARE_FATAL (processes in this
communicator will now abort,
[estri_mobile:6337] ***    and potentially your MPI job)


-- 
С уважением.

#include <stdio.h>
#include <mpi.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <math.h>
#include "somehead.h"


int size; // numb of CPU
int rank; // numb of current proc
int gridSz; // greed size (must be size*size)
int gridCoord[2]; // coord proc in grid
double *aMatrix; // 
double *bMatrix; // 
double *cMatrix; // 
double *aMatrixBlock; // block matrix for buffer A
double *aBufProc; // matix A in current proc
double *bBufProc; // matix B in current proc
double *cBufProc; // matix C in current proc

static MPI_Comm gridComm; // 
static MPI_Comm rowComm;  // 
static MPI_Comm colComm;  // 
///////////////////////////////////////////////////////////////////////////
// init data in matrices
void
dataInit(double* aMatrix, double* bMatrix, int matrixSize)
{
    int value = 1;
    uint i, j;
    srand(value);
    for (i = 0; i < matrixSize; i++){
        for (j = 0; j < matrixSize; j++){
            aMatrix[i * matrixSize+j] = 1.0 + rand() % 5;
            bMatrix[i * matrixSize+j] = 1.0 + rand() % 7;
        }
    }
}
///////////////////////////////////////////////////////////////////////////////////
// create comm for 2d grid coord
// define coord of proc in current grid
// make comm for row and column (MPI_Cart_create)
void
gridCommCr()
{
    int dimSize[2]; // for carry numb of proc in gridколичество процессов в каждой размерности сетки
    int period[2]; // 1 - periodicaly size, 0 - not
    int subDim[2]; // 1 - if dimension must be in subgrid else - 0
    dimSize[0] = gridSz;
    dimSize[1] = gridSz;
    period[0] = 0;
    period[1] = 0;
    MPI_Dims_create(size, 2, dimSize);
    MPI_Cart_create(MPI_COMM_WORLD, 2, dimSize, period, 1, &gridComm);
    MPI_Cart_coords(gridComm, rank, 2, gridCoord);

    subDim[0] = 0; // 
    subDim[1] = 1; // 
    MPI_Cart_sub(gridComm, subDim, &rowComm); // 

    subDim[0] = 1; // 
    subDim[1] = 0; // 
    MPI_Cart_sub(gridComm, subDim, &colComm); // 
    printf("Comm created!");
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
printMa(double* curMatrix, int numbRow, int numbCol)
{
    uint i, j;
    for (i = 0; i < numbRow; i++){
        for (j = 0; j < numbCol; j++){
            printf("%7.4f ", curMatrix[i*numbRow+j]);
        }
    printf("\n");
    }
}
//////////////////////////////////////////////////////////////////////////////////////////////
void
delivData(double* aMatrix, double* bMatrix, double* aMatrixBlock, double* bBufProc, int matrixSize, int blockSize)
{
    matrixScatter(aMatrix, aMatrixBlock, matrixSize, blockSize);
    matrixScatter(bMatrix, bBufProc, matrixSize, blockSize);
}

void
matrixScatter(double* curMatrix, double* curBufBlock, int maSize, int blockSize)
{
    uint i;
    double * tempMaRow = (double*) malloc((blockSize*maSize)*sizeof(double)); // 
    if (gridCoord[1] == 0){
        MPI_Scatter(curMatrix, blockSize*maSize, MPI_DOUBLE, tempMaRow, blockSize*maSize, MPI_DOUBLE, 0, colComm);
    }
    for (i = 0; i < blockSize; i++){
        MPI_Scatter(&tempMaRow[i*maSize], blockSize, MPI_DOUBLE,&(curBufBlock[i*blockSize]), blockSize, MPI_DOUBLE, 0, rowComm);
    }
    free (tempMaRow);
}


///////////////////////////////////////////////////////////////////////////////////////////////
void
calcParal(double* aMatrix, double* aMatrixBlock, double* bBufProc, double* cBufProc, int blockSize)
{
    int iter;
    for (iter = 0; iter < gridSz; iter++){
        blockAbroadcast(iter, aMatrix, aMatrixBlock, blockSize);
        blMulti(aMatrix, bBufProc, cBufProc, blockSize);
        bBlSendRecv(bBufProc, blockSize);
    }
}

void
blockAbroadcast(int iter, double* aBufProc, double* aMatrixBlock, int blockSize)
{
    uint i;
    int tmpVar = (gridCoord[0] + iter) % gridSz;
    if (gridCoord[1] == tmpVar){
        for(i = 0; i < blockSize*blockSize; i++){
            aBufProc[i] = aMatrixBlock[i];
        }
    }
    MPI_Bcast(aBufProc, blockSize*blockSize, MPI_DOUBLE, tmpVar, rowComm);
}

void
bBlSendRecv(double* bBufProc, int blockSize)
{
    MPI_Status status;
    int nextProc = gridCoord[0] + 1;
    if (gridCoord[0] == gridSz - 1){
        nextProc = 0;
    }
    int pervProc = gridCoord[0] - 1;
    if(gridCoord[0] == 0){
        pervProc = gridSz -1;
    }
    MPI_Sendrecv_replace( bBufProc, blockSize*blockSize, MPI_DOUBLE, nextProc, 0, pervProc, 0, colComm, &status);
}

void
blMulti(double* aBlock, double* bBlock, double* cBlock, int matrixSize)
{
    serialCalc(aBlock, bBlock, cBlock, matrixSize);
}

void
serialCalc(double* aMatrix, double* bMatrix, double* cMatrix, int matrixSize)
{
    uint i, j, k;
    for (i=0; i<matrixSize; i++){
        for (j=0; j<matrixSize; j++){
            for (k=0; k<matrixSize; k++)
            {
                cMatrix[i*matrixSize+j] += aMatrix[i*matrixSize+k]*bMatrix[k*matrixSize+j];
            }
        }
    }
}


//////////////////////////////////////////////////////////////////////////////
void
collect(double* cMatrix, double* cBufProc,int matrixSize, int blockSize)
{
    int *tmpVar;
    tmpVar = malloc((matrixSize*blockSize)*sizeof(uint));
    uint i;
    for (i = 0; i < blockSize; i++)    {
        MPI_Gather(&cBufProc[i*matrixSize], blockSize, MPI_DOUBLE, 0, tmpVar[i*matrixSize], MPI_DOUBLE, 0, rowComm);
    }
    if (gridCoord[1] == 0){
        MPI_Gather(tmpVar, blockSize * matrixSize, MPI_DOUBLE, cMatrix, blockSize*matrixSize, MPI_DOUBLE, 0, colComm);
    }
    free(tmpVar);
}


//////////////////////////////////////////////////////////////////////////////////////////
void
blockPrint (double* bufBlock, int blockSize, char str[])
{
    uint i;
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == 0){
        printf("%s \n", str);
    }
    for (i=0; i<size; i++){
        if (rank == i){
            printf ("ProcRank = %d \n", rank);
            printMa(bufBlock, blockSize, blockSize);
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }
}


/////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv)
{
    int matrixSize = 4; // 
    int blockSize; //

    uint i;

    //double start, end; // reserved

    //setvbuf(stdout, 0, _IONBF, 0); // buffer off

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    gridSz = sqrt((double) size);
    blockSize = matrixSize / gridSz;
    aBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double));
    bBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double));
    cBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double));
    aMatrixBlock = (double*) malloc((blockSize*blockSize)*sizeof(double));
    if (rank == 0){
        if (size != gridSz*gridSz){
            printf("Grid size must be = countCPU*countCPU\n");
            MPI_Abort(MPI_COMM_WORLD, 911);
            MPI_Finalize();
            return 0; // i know it bad but ...
        }
    }
    aMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double));
    bMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double));
    cMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double));

    gridCommCr();
    dataInit(aMatrix, bMatrix, matrixSize);
    MPI_Bcast(&matrixSize, 1, MPI_INT, 0, MPI_COMM_WORLD);
    for (i=0; i < blockSize*blockSize; i++)
        cBufProc[i] = 0.0;

    delivData(aMatrix, bMatrix, aMatrixBlock, bBufProc, matrixSize, blockSize);
    calcParal(aMatrix, aMatrixBlock, bBufProc, cBufProc, blockSize);
    collect(cMatrix, cBufProc, matrixSize, blockSize);

    printf("Matrix A:\n");
    printMa(aMatrix, matrixSize, matrixSize);
    printf("Matrix B:\n");
    printMa(bMatrix, matrixSize, matrixSize);
    printf("Matrix C:\n");
    printMa(cMatrix, matrixSize, matrixSize);
    
    MPI_Finalize();
    return 0;
}

 #ifndef __SOMEHEAD_H_
 #define __SOMEHEAD_H_

typedef unsigned int uint;

void gridCommCr();
void dataInit(double* aMatrix, double* bMatrix, int matrixSize);
void printMa(double* curMatrix, int numbRow, int numbCol);
void matrixScatter(double* curMatrix, double* curBufBlock, int maSize, int blockSize);
void delivData(double* aMatrix, double* bMatrix, double* aMatrixBlock, double* bBufProc, int matrixSize, int blockSize);
void bBlSendRecv(double* bBufProc, int blockSize);
void calcParal(double* aMatrix, double* aMatrixBlock, double* bBufProc, double* cBufProc, int blockSize);
void collect(double* cMatrix, double* cBufProc,int matrixSize, int blockSize);
void blockAbroadcast(int iter, double* aBufProc, double* aMatrixBlock, int blockSize);
void blMulti(double* aBlock, double* bBlock, double* cBlock, int matrixSize);
void serialCalc(double* aMatrix, double* bMatrix, double* cMatrix, int matrixSize);
void blockPrint (double* bufBlock, int blockSize, char str[]);

#endif // __SOMEHEAD_H__

[OMPI users] Problem with implementation of Foxa algorithm

Reply via email to