I'm working on an implementation of the approach described in the wiki article for the in-place cache-oblivious transposition of a square Matrix.
https://en.wikipedia.org/wiki/In-place_matrix_transposition
The algorithm basically recursively splits the Matrix into four, then transposes the quadrants which are along the diagonal and swaps the ones that are above and below it. The actual transposition/swapping only occurs if the matrix is of size 2*2 or lower, otherwise, it is split again.
I've split it into three functions:
This initiates the procedure for a given size N:
void SmartTranspose(int A[row][col]) {
Transpose(A, 0, 0, N, N);
}
Then:
void Transpose(int A[row][col], int x, int y, int w, int h) {
int Temp;
if ((w - x) * (h - y) <= 4){
for (int row1 = x ; row1 < w -1 ; row1++)
for (int col1 = y + 1 ; col1 < h ; col1++) {
Temp = A[row1][col1];
printf("transp: %d %d\n", A[row1][col1], A[col1] [row1]);
A[row1][col1] = A[col1][row1];
A[col1][row1] = Temp;
}
}
else {
int halfh = h / 2;
int halfw = w / 2;
Transpose(A, x, y, halfw , halfh);
Transpose(A, x + halfw, y + halfh, w , h);
TransposeSwap(A, x + halfw, y, w, halfh, x, y + halfh, halfw , h);
}
}
And finally:
void TransposeSwap(int A[row][col], int x, int y, int w, int h,int x1, int y1, int w1, int h1) {
int Temp; int row2 = x1; int col2 = y1;
if ((w - x) * (h - y) <= 4 && (w1 - x1) * (h1 - y1) <= 4) {
for(row1 = x; row1 < w; row1++)
for(col1 = y; col1 < h; col1++)
{
Temp = A[row1][col1] ;
A[row1][col1] = A[col1][row1];
A[col1][row1] = Temp;
}
}
else {
printf("RECURSE");
int halfh = h / 2;
int halfw = w / 2;
int halfh1 = h1 / 2;
int halfw1 = w1 / 2;
TransposeSwap(A, x, y, halfw, halfh, x1, y1, halfw1, halfh1);
TransposeSwap(A, x + halfw, y, w, h - halfh, x1, y1 + halfh1, halfw1, h1);
TransposeSwap(A, x , y + halfh, halfw, h, x1 + halfw1, y1, w1, halfh1);
TransposeSwap(A, x + halfw, y + halfh, w, h, x1 + halfw1, y1 + halfh1, w1, h1);
}
}
This does not work, however, and I'm struggling to see where my logic goes wrong here.
Edit: Example of Output
Original matrix:
1948037971 40713922 986050715 74181839 943010147 1060710730
18590233 268906808 1966315840 1325423973 398061279 2047858287
513589654 1727398080 2016821685 277200601 1611383116 2000671901
228038281 1863845528 106517081 1934721636 745170263 1736525254
224427632 687572994 1249224754 1497415191 537022734 1443375385
1054092341 337577057 1484089307 2040143056 411758897 279615807
Transposed matrix:
1948037971 18590233 513589654 74181839 943010147 1060710730
40713922 268906808 1727398080 1325423973 398061279 2047858287
986050715 1966315840 2016821685 277200601 1611383116 2000671901
228038281 1863845528 106517081 1934721636 745170263 1736525254
224427632 687572994 1249224754 1497415191 537022734 1443375385
1054092341 337577057 1484089307 2040143056 411758897 279615807
The correct output should be a transposed matrix.
Edit: Main function and declarations:
int row = 40000 , col = 40000;
static int A[40000][40000];
static int N[100] = {0};
void SmartTranspose(int A[row][col]);
void Transpose(int A[row][col], int x, int y, int w, int h);
void InitializeMatrix(int X[row][col]);
void PrintMatrix(int X[row][col]);
double pow(double x, double y);
int matrix = 0;
void TransposeSwap(int A[row][col], int x, int y, int w, int h,int x1, int y1, int w1, int h1);
int main(){
srand(time(NULL));
double sizes = 0;
int count = 0;
for(sizes = 20; sizes < 30; sizes++)
{
N[count] = floor(pow(2, (sizes/9)));
printf("N %d\n", N[count]);
count++; }
for (matrix = 0; matrix <= count -1 ; matrix++){
InitializeMatrix(A);
printf("N %d\n",N[matrix]);
printf("\nOriginal matrix: \n");
SmartTranspose(A);
printf("E\n");
printf("\nTransposed matrix: \n");
PrintMatrix(A);
}
return 0;
}
A link to the complete code: https://jpst.it/QaBq