1

I'm trying to write a super simple C program of the vector multiply-add "axpy" algorithm for integer data types. The program output the execution time to measure the performance of a machine. The matrices are filled by random numbers.

int benchmark(void) {
    int N;      /* The matrix size, controlled by user input */
    int r, c;   /* Row and Column number */
    int random; /* Random number to fill the matix */
    int a = rand() % 20; /* Scale number to multiply x matrix */

    printf("Enter the size(N*N) of the matrices(Maximum 1,000,000)\n");
    scanf("%d", &N);

    if (N > 1000000) {
        fprintf(stderr, "Size of matrix is too large!\n");
        return 0;
    }

    /* Initialize and fill the matrix x and y */
    int xMatrix[N][N], yMatrix[N][N], resultMatrix[N][N];

    /* Compute time */
    clock_t t;

    t = clock();

    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            random = rand() % 100;
            xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
        }
    }
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            int random = rand() % 100;
            yMatrix[r][c] = random;
        }
    }

    /* Add two matrix together */
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
        }
    }

    t = clock() - t;

    double timeTaken = ((double)t) / CLOCKS_PER_SEC;
    printf("\n -> Total time : %f seconds\n", timeTaken);
    printf("\n -> Vector length : %d", N * N);

}

User controls the size of the matrix. The program works fine when the value of N is less than 800.

chqrlie
  • 131,814
  • 10
  • 121
  • 189
Semicolon
  • 131
  • 1
  • 10
  • 1
    You're probably running out of stack. Consider dynamically allocating the arrays with malloc. – Retired Ninja Oct 07 '17 at 02:28
  • 800*800=640,000. x4 (size of an int) = 3,200,000. That's a **lot** of memory to allocate as a local variable, it probably is failing due to a lack of stack space. Put it on the heap instead. – Loren Pechtel Oct 07 '17 at 02:34

2 Answers2

3

The size of the objects allocated with automatic storage (on the stack) is too large, you get undefined behavior, more specifically a Stack overflow.

You should instead allocate the objects from the heap:

    /* Initialize and fill the matix x and y */
    int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
    int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
    int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));

And verify that none of the pointers returned by malloc() are NULL.

Here is the modified code:

int benchmark(void) {
    int N;       /* The matrix size, controlled by user input */
    int r, c;    /* Row and Column number */
    int random;  /* Random number to fill the matix */
    int a = rand() % 20; /* Scale number to multiply x matrix */

    printf("Enter the size(N*N) of the matrices (Maximum 1,000,000)\n");
    if (scanf("%d", &N) != 1) {
        fprintf(stderr, "Input error!\n");
        return 0;
    }

    if (N > 1000000) {
        fprintf(stderr, "Matrix size is too large!\n");
        return 0;
    }

    /* Initialize and fill the matrix x and y */
    int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
    int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
    int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));

    if (xMatrix == NULL || yMatrix == NULL || resultMatrix == NULL) {
        fprintf(stderr, "Memory allocation failed!\n");
        free(xMatrix);
        free(yMatrix);
        free(resultMatrix);
        return 0;
    }

    /* Compute time */
    clock_t t = clock();

    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            random = rand() % 100;
            xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
        }
    }
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            random = rand() % 100;
            yMatrix[r][c] = random;
        }
    }

    /* Add two matrix together */
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
        }
    }

    t = clock() - t;

    double timeTaken = ((double)t) / CLOCKS_PER_SEC;
    printf("\n -> Total time : %f seconds\n", timeTaken);
    printf("\n -> Vector length : %lld", (long long)N * N);

    free(xMatrix);
    free(yMatrix);
    free(resultMatrix);
    return 0;
}

Note however that your computation is very simple, most of the time is likely spent in the rand() function.

Employed Russian
  • 199,314
  • 34
  • 295
  • 362
chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • lol cant believe most time wasted on rand() – Semicolon Oct 07 '17 at 03:09
  • by the way, `*xMatrix[r] = (int *) malloc(N * sizeof(int))` will cause `error: incompatible types when assigning to type ‘int[(sizetype)(N)]’ from type ‘int *’ ` – Semicolon Oct 07 '17 at 03:19
  • @zuolizhu: of course, but there is no such code in my post. `xMatrix` is a pointer to a 2D array of `N` by `N` `int`s, allocated with a single call to `malloc()`. – chqrlie Oct 07 '17 at 11:49
  • oh yes, that make sense! Thanks a lot! – Semicolon Oct 07 '17 at 14:36
1

You are trying to allocate memmory dynamically, I would recommend using malloc from stdlib.h as shown bellow.

Also, check out these SO posts: memory allocation in Stack and Heap, and What and where are the stack and heap?

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int benchmark(void) {
    int N;  /* The matrix size, controlled by user input */
    int r, c; /* Row and Column number */
    int random; /* Random number to fill the matix */
    int a = rand() % 20; /* Scale number to multiply x matrix */

    printf("Enter the size(N*N) of the matrixs(Maximum 1,000,000)\n");
    scanf("%d", &N);

    if(N > 1000000) {
        fprintf(stderr, "Size of matrix is too large!\n");
        return 0;
    }

    /* Initialize and fill the matix x and y */
    int** xMatrix = NULL;
    int** yMatrix = NULL;
    int** resultMatrix = NULL;

    /* Using the heap memory allocation instead of the stack */
    xMatrix = (int **) malloc(N * sizeof(int *));
    yMatrix = (int **) malloc(N * sizeof(int *));
    resultMatrix = (int **) malloc(N * sizeof(int *));
    for (r = 0; r < N; r++) {
        xMatrix[r] = (int *) malloc(N * sizeof(int));
        yMatrix[r] = (int *) malloc(N * sizeof(int));
        resultMatrix[r] = (int *) malloc(N * sizeof(int));
    }

    /* Compute time */
    clock_t t;

    t = clock();

    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            random = rand() % 100;
            xMatrix[r][c] = a * random; /* Multiply matix x with random value a */
        }
    }
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            int random = rand() % 100;
            yMatrix[r][c] = random;
        }
    }

    /* Add two matrix together */
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
        }
    }

    t = clock() - t;

    double timeTaken = ((double)t)/CLOCKS_PER_SEC;
    printf("\n -> Total time : %f seconds\n", timeTaken);
    printf("\n -> Vector length : %d", N*N);

    /* Always remember to free your allocated memory */
    for (r = 0; r < N; r++) {
        free(xMatrix[r]);
        free(yMatrix[r]);
        free(resultMatrix[r]);
    }
    free(xMatrix);
    free(yMatrix);
    free(resultMatrix);

}

int main() {
    benchmark();
    return 0;
}
Bernardo Duarte
  • 4,074
  • 4
  • 19
  • 34