I don't give all the code since I'm trying to make this part as readible as possible and the rest of the program is not so essential. E
is a unit matrix, A
is an invertible matrix. Function Solve(...)
is called from int main()
and transforms both E
and A
. As a result we should get inverse of A
in E
and unit matrix in A
.
When I run this program with 1 process, it works excellently. When I add another process, the resulting matrices are far from what's needed. It looks like actions are done in wrong order. I've read (for example, here OpenMPI MPI_Barrier problems) that MPI works messy with the output, so adding some output between actions doesn't help to find out what's happening. Plus, since the program works fine for one process, the algorithm is correct, the whole problem is coming from MPI and it shouldn't have appeared if MPI_Barrier(MPI_COMM_WORLD)
had done its job properly.
Does MPI_Barrier(MPI_COMM_WORLD)
synchronize processes here? Why does the program work fine for 1 process and fails for 2?
Thank you.
struct Matrix{
vector<vector<double>> mat;
size_t m; //rows
size_t n; //columns
};
void Solve(Matrix &A, Matrix &E, size_t rank, size_t size)
{
size_t n=A.m;
size_t ind;
for(size_t k=0; k<n; k++)
{
if(rank==0) // initial action are executed by 0th process
{
ind=A.MainElement_Row(k,k);
E.SwapColumns(k,ind);
A.SwapColumns(k,ind);
E.MultColumn(k,1/A.mat[k][k]);
A.MultColumn(k,1/A.mat[k][k]);
}
MPI_Barrier(MPI_COMM_WORLD); // I don't want other processes to go further until 0th did its work
for(size_t i=0; i<n; i++)
MPI_Bcast(&E.mat[i][0], n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(size_t i=0; i<n; i++)
MPI_Bcast(&A.mat[i][0], n, MPI_DOUBLE, 0, MPI_COMM_WORLD); // now all processes know how the matrices look
MPI_Barrier(MPI_COMM_WORLD); // Me making sure they all got it
for(size_t i=rank; i<n; i=i+size) // here parallel programming starts. Every process works with its share of columns. i-th process is responsible for changing i-th, (i+size)th, (i+2*size)th etc. columns. Processes' areas of work don't intersect
{
if(i!=k)
{
E.SubtractColumns(i,k,A.mat[k][i]);
A.SubtractColumns(i,k,A.mat[k][i]);
}
}
MPI_Barrier(MPI_COMM_WORLD); // Let's make sure all processes did their job
for(size_t i=0; i<size; i++)
{
for(size_t j=0; j<n; j++)
MPI_Bcast(&E.mat[j][0], n, MPI_DOUBLE, i, MPI_COMM_WORLD);
for(size_t j=0; j<n; j++)
MPI_Bcast(&A.mat[j][0], n, MPI_DOUBLE, i, MPI_COMM_WORLD); // May be it's not the best choice of function, but anyway now all processes broadcasted what they did to the matrix
}
MPI_Barrier(MPI_COMM_WORLD); // we make sure, they all understood how the matrices look now
}
}
(Edit, improved version of code)
void SendColumn(Matrix &A, Matrix &E, size_t root, size_t n, size_t num)
{
for(size_t m=0; m<n; m++)
{
MPI_Bcast(&E.mat[m][num], 1, MPI_DOUBLE, root, MPI_COMM_WORLD);
MPI_Bcast(&A.mat[m][num], 1, MPI_DOUBLE, root, MPI_COMM_WORLD);
}
}
void Solve(Matrix &A, Matrix &E, size_t rank, size_t size)
{
size_t n=A.m;
size_t ind;
for(size_t k=0; k<n; k++)
{
if(rank==0)
{
ind=A.MainElement_Row(k,k);
E.SwapColumns(k,ind);
A.SwapColumns(k,ind);
E.MultColumn(k,1/A.mat[k][k]);
A.MultColumn(k,1/A.mat[k][k]);
}
MPI_Barrier(MPI_COMM_WORLD);
SendColumn(A,E,0,n,k);
SendColumn(A,E,0,n,ind);
MPI_Barrier(MPI_COMM_WORLD);
for(size_t i=rank; i<n; i=i+size)
{
if(i!=k)
{
E.SubtractColumns(i,k,A.mat[k][i]);
A.SubtractColumns(i,k,A.mat[k][i]);
}
}
MPI_Barrier(MPI_COMM_WORLD);
for(size_t root=0; root<size; root++)
{
for(size_t i=root; i<n; i=i+size)
{
if(i!=k)
SendColumn(A,E,root,n,i);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
}