I got a task to write a Strassen-Winograd algorithm in C++. I have written it twice, but the first version of my code doesn't work. The result is correct in the lower left corner of result matrix. And my second version is running slower than naive algorithm, even with N = 64+.
What am I doing wrong?
Important note: I'm not allowed to use dynamic matrix in recursion and structures. In addition, it is better to do the multiplication without copying, using the coordinates of the corner element of the submatrices.
Incorrect:
#include "pch.h"
#include<iostream>
#include<cstdio>
#include<conio.h>
#include<cstdlib>
#include<cmath>
#include<ctime>
#pragma comment(linker, "/STACK:5813243000")
using namespace std;
const int sizs = 256;
void vivod(int matrix[][256], int n);
void Matrix_Add(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2);
void Matrix_Sub(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2);
void Matrix_Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2, int n);
void strassen(int a[][256], int b[][256], int c[][256], int m, int n, int x1, int y1, int x2, int y2);
void Naive_Multiply(int a[][256], int b[][256], int c[][256], int n)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i][j] = 0;
for (int t = 0; t < n; t++) {
c[i][j] = c[i][j] + a[i][t] * b[t][j];
}
}
}
}
void Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2)
{
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
c[i][j] = 0;
for (int t = 0; t < 2; t++) {
c[i][j] = c[i][j] + a[i][t] * b[t][j];
}
}
}
}
int main()
{
setlocale(LC_ALL, "Russian");
int n;
cout << "Введите число n:";
cin >> n;
const int m = 256;
int A[m][m];
int B[m][m];
int C[m][m];
int k[m][m];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
A[i][j] = 0;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
B[i][j] = 0;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
A[i][j] = rand() % 10;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
B[i][j] = rand() % 10;
cout << "First Matrix:" << endl;
vivod(A, n);
cout << "Second Matrix:" << endl;
vivod(B, n);
int begin = clock();
//for (int i =0; i < 100; i++)
Naive_Multiply(A, B, k, n);
int end = clock();
cout << "Naive Multiply time: " << end - begin << endl;
vivod(k, n);
int begin2 = clock();
//for (int i = 0; i < 100; i++)
strassen(A, B, C, n, n, 0, 0, 0, 0);
int end2 = clock();
cout << "time: " << end2 - begin2 << endl;
vivod(C, n);
system("pause");
return 0;
}
void strassen(int a[][256], int b[][256], int c[][256], int m, int n, int x1, int y1, int x2, int y2) {
m = n / 2;
if (m != 1)
{
int s1[sizs][sizs];
int s2[sizs][sizs];
int s3[sizs][sizs];
int s4[sizs][sizs];
int s5[sizs][sizs];
int s6[sizs][sizs];
int s7[sizs][sizs];
int s8[sizs][sizs];
int m1[sizs][sizs];
int m2[sizs][sizs];
int m3[sizs][sizs];
int m4[sizs][sizs];
int m5[sizs][sizs];
int m6[sizs][sizs];
int m7[sizs][sizs];
int t1[sizs][sizs];
int t2[sizs][sizs];
int c11[sizs][sizs];
int c12[sizs][sizs];
int c21[sizs][sizs];
int c22[sizs][sizs];
Matrix_Add(a, a, s1, m, n - m, n - 2 * m, n - m, n - m);
Matrix_Sub(s1, a, s2, m, 0, 0, n - 2 * m, n - 2 * m);
Matrix_Sub(a, a, s3, m, n - 2 * m, n - 2 * m, n - m, n - 2 * m);
Matrix_Sub(a, s2, s4, m, n - 2 * m, n - m, 0, 0);
Matrix_Sub(b, b, s5, m, n - 2 * m, n - m, n - 2 * m, n - 2 * m);
Matrix_Sub(b, s5, s6, m, n - m, n - m, 0, 0);
Matrix_Sub(b, b, s7, m, n - m, n - m, n - 2 * m, n - m);
Matrix_Sub(s6, b, s8, m, 0, 0, n - m, n - 2 * m);
strassen(s2, s6, m1, m, m, 0, 0, 0, 0);
strassen(a, b, m2, m, m, n - 2 * m, n - 2 * m, n - 2 * m, n - 2 * m);
strassen(a, b, m3, m, m, n - 2 * m, n - m, n - m, n - 2 * m);
strassen(s3, s7, m4, m, m, 0, 0, 0, 0);
strassen(s1, s5, m5, m, m, 0, 0, 0, 0);
strassen(s4, b, m6, m, m, 0, 0, n - m, n - m);
strassen(a, s8, m7, m, m, n - m, n - m, 0, 0);
Matrix_Add(m1, m2, t1, m, 0, 0, 0, 0);
Matrix_Add(t1, m4, t2, m, 0, 0, 0, 0);
Matrix_Add(m2, m3, c11, m, 0, 0, 0, 0);
Matrix_Sub(t2, m7, c21, m, 0, 0, 0, 0);
Matrix_Add(t1, m5, c12, m, 0, 0, 0, 0);
Matrix_Add(c12, m6, c12, m, 0, 0, 0, 0);
Matrix_Add(t2, m5, c22, m, 0, 0, 0, 0);
for (int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
c[i + n - 2 * m][j + n - 2 * m] = c11[i][j];
c[i + n - 2 * m][j + n - m] = c12[i][j];
c[i + n - m][j + n - 2 * m] = c21[i][j];
c[i + n - m][j + n - m] = c22[i][j];
}
}
}
else
{
Matrix_Multiply(a, b, c, x1, y1, x2, y2, n);
}
}
void vivod(int matrix[][256], int n)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cout << matrix[i][j] << " ";
}
cout << endl;
}
cout << endl;
}
void Matrix_Add(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i][j] = a[i + x1][j + y1] + b[i + x2][j + y2];
}
}
}
void Matrix_Sub(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i][j] = a[i + x1][j + y1] - b[i + x2][j + y2];
}
}
}
void Matrix_Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2, int n)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i][j] = 0;
for (int t = 0; t < n; t++) {
c[i][j] = c[i][j] + a[x1 + i][y1 + t] * b[x2 + t][y2 + j];
}
}
}
}
Slow, deadline default 1:
#include "pch.h"
#include <stdio.h>
#include <iostream>
#include<cstdlib>
#include<ctime>
#pragma comment(linker, "/STACK:44333338")
int deadline;
using namespace std;
const int n = 16;
void StrVin(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void add(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void sub(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void Multiply(int a[n][n], int b[n][n], int c[n][n]);
void vivod(int a[n][n]);
void naivemultiplication(int a[n][n], int b[n][n], int c[n][n], int size);
int main()
{
setlocale(LC_ALL, "Russian");
cout << "Введите предел: ";
cin >> deadline;
int a[n][n];
int b[n][n];
int c[n][n];
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
a[i][j] = rand() % 10;
b[i][j] = rand() % 10;
c[i][j] = 0;
}
}
cout << "First Matrix" << endl;
//vivod(a);
cout << "Second Matrix" << endl;
//vivod(b);
cout << "Strassen-Winograd ";
long double begin2 = clock();
StrVin(a, b, c, n);
long double end2 = clock();
cout << "Time: " << (end2 - begin2) / CLOCKS_PER_SEC << endl;
//vivod(c);
cout << "Standart method ";
long double begin1 = clock();
naivemultiplication(a, b, c, n);
long double end1 = clock();
cout << "Time: " << (end1 - begin1) / CLOCKS_PER_SEC << endl;
//vivod(c);
system("pause");
return 0;
}
void vivod(int a[n][n])
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << a[i][j] << " ";
cout << endl;
}
cout << endl;
}
void add(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
C[i][j] = A[i][j] + B[i][j];
}
}
}
void sub(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
C[i][j] = A[i][j] - B[i][j];
}
}
}
void Multiply(int a[n][n], int b[n][n], int c[n][n])
{
for (int i = 0; i < deadline*2; i++) {
for (int j = 0; j < deadline*2; j++) {
c[i][j] = 0;
for (int t = 0; t < deadline*2; t++) {
c[i][j] = c[i][j] + a[i][t] * b[t][j];
}
}
}
}
void StrVin(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{
int half = size / 2;
if (half != deadline)
{
int A11[n][n], A12[n][n], A21[n][n], A22[n][n], B11[n][n], B12[n][n], B21[n][n], B22[n][n], C11[n][n], C12[n][n], C21[n][n], C22[n][n];
int S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n], S8[n][n];
int P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
int T1[n][n], T2[n][n];
int K[n][n];
for (int i = 0; i < half; i++) {
for (int j = 0; j < half; j++) {
A11[i][j] = A[i][j];
A12[i][j] = A[i][j + half];
A21[i][j] = A[i + half][j];
A22[i][j] = A[i + half][j + half];
B11[i][j] = B[i][j];
B12[i][j] = B[i][j + half];
B21[i][j] = B[i + half][j];
B22[i][j] = B[i + half][j + half];
}
}
for (int i = 0; i < half; i++)
{
for (int j = 0; j < half; j++)
{
C11[i][j] = C[i][j];
}
}
for (int i = 0; i < half; i++)
{
for (int j = half; j < size; j++)
{
C12[i][j - half] = C[i][j];
}
}
for (int i = half; i < size; i++)
{
for (int j = 0; j < half; j++)
{
C21[i - half][j] = C[i][j];
}
}
for (int i = half; i < size; i++)
{
for (int j = half; j < size; j++)
{
C22[i - half][j - half] = C[i][j];
}
}
add(A21, A22, S1, half);
sub(S1, A11, S2, half);
sub(A11, A21, S3, half);
sub(A12, S2, S4, half);
sub(B12, B11, S5, half);
sub(B22, S5, S6, half);
sub(B22, B12, S7, half);
sub(S6, B21, S8, half);
StrVin(S2, S6, P1, half);
StrVin(A11, B11, P2, half);
StrVin(A12, B21, P3, half);
StrVin(S3, S7, P4, half);
StrVin(S1, S5, P5, half);
StrVin(S4, B22, P6, half);
StrVin(A22, S8, P7, half);
add(P1, P2, T1, half);
add(T1, P4, T2, half);
add(P2, P3, C11, half);
add(T1, P5, K, half);
add(K, P6, C12, half);
sub(T2, P7, C21, half);
add(T2, P5, C22, half);
for (int i = 0; i < half; i++)
{
for (int j = 0; j < half; j++)
{
C[i][j] = C11[i][j];
C[i][j + half] = C12[i][j];
C[i + half][j] = C21[i][j];
C[i + half][j + half] = C22[i][j];
}
}
}
else
{
Multiply(A, B, C);
}
}
void naivemultiplication(int a[n][n], int b[n][n], int c[n][n], int size)
{
for (int k = 0; k < size; k++)
{
for (int i = 0; i < size; i++)
{
c[k][i] = 0;
for (int j = 0; j < size; j++)
{
c[k][i] += a[k][j] * b[j][i];
}
}
}
}