I would like to ask you for help. I've implemented divide and conquer algorithm in C++. It works fine, but it is really slow. 512x512 matrix is calculated like 90 seconds which is unacceptable compared with 0.4s with naive algorithm. Could you tell me, if, and what I'm doing wrong?
The main divide and conquer function is:
std::vector<std::vector<int>> Matrix::divideAndConquer(std::vector<std::vector<int>>& matrix1,
std::vector<std::vector<int>>& matrix2)
{
if (matrix1.size() == 1) {
return naive(matrix1, matrix2);
}
int newSize = matrix1.size() / 2;
std::vector<int> inner(newSize);
std::vector<std::vector<int>> a11(newSize, inner);
std::vector<std::vector<int>> a12(newSize, inner);
std::vector<std::vector<int>> a21(newSize, inner);
std::vector<std::vector<int>> a22(newSize, inner);
std::vector<std::vector<int>> b11(newSize, inner);
std::vector<std::vector<int>> b12(newSize, inner);
std::vector<std::vector<int>> b21(newSize, inner);
std::vector<std::vector<int>> b22(newSize, inner);
Matrix::divideMatrix(matrix1, a11, a12, a21, a22);
Matrix::divideMatrix(matrix2, b11, b12, b21, b22);
std::vector<std::vector<int>> m1 = divideAndConquer(a11, b11);
std::vector<std::vector<int>> m2 = divideAndConquer(a12, b21);
std::vector<std::vector<int>> m3 = divideAndConquer(a11, b12);
std::vector<std::vector<int>> m4 = divideAndConquer(a12, b22);
std::vector<std::vector<int>> m5 = divideAndConquer(a21, b11);
std::vector<std::vector<int>> m6 = divideAndConquer(a22, b21);
std::vector<std::vector<int>> m7 = divideAndConquer(a21, b12);
std::vector<std::vector<int>> m8 = divideAndConquer(a22, b22);
std::vector<std::vector<int>> c1 = add(m1, m2);
std::vector<std::vector<int>> c2 = add(m3, m4);
std::vector<std::vector<int>> c3 = add(m5, m6);
std::vector<std::vector<int>> c4 = add(m7, m8);
return combine(c1, c2, c3, c4);
}
And then divide matrix function is:
void Matrix::divideMatrix(std::vector<std::vector<int>>& matrix, std::vector<std::vector<int>>& m1,
std::vector<std::vector<int>>& m2, std::vector<std::vector<int>>& m3, std::vector<std::vector<int>>&
m4)
{
for (int i = 0; i < matrix.size() / 2; i++) {
for (int j = 0; j < matrix.size() / 2; j++) {
m1[i][j] = matrix[i][j];
m2[i][j] = matrix[i][j + matrix.size() / 2];
m3[i][j] = matrix[i + (matrix.size() / 2)][j];
m4[i][j] = matrix[i + (matrix.size() / 2)][j + (matrix.size() / 2)];
}
}
}
Combine method is:
std::vector<std::vector<int>> Matrix::combine(std::vector<std::vector<int>>& m1, std::vector<std::vector<int>>& m2, std::vector<std::vector<int>>& m3, std::vector<std::vector<int>>& m4)
{
std::vector<int> inner(m1[0].size() + m2[0].size());
std::vector<std::vector<int>> result(m1.size() + m2.size(), inner);
for (int i = 0; i < m1.size(); i++) {
for (int j = 0; j < m2.size(); j++) {
result[i][j] = m1[i][j];
result[i][j + m1[0].size()] = m2[i][j];
result[i + m1.size()][j] = m3[i][j];
result[i + m1.size()][j + m3[0].size()] = m4[i][j];
}
}
return result;
}
And last add method is:
std::vector<std::vector<int>> Matrix::add(std::vector<std::vector<int>>& matrix1, std::vector<std::vector<int>>& matrix2)
{
std::vector<std::vector<int>> result(matrix1);
for (int i = 0; i < matrix1.size(); i++) {
for (int j = 0; j < matrix1.size(); j++) {
result[i][j] = matrix1[i][j] + matrix2[i][j];
}
}
return result;
}
The naive function just returns multiplication of matrix1[0][0] * matrix2[0][0] in a new vector