2

I wrote the merge sort algorithm in C and have compiled it both locally and remotely. I assume something in the source is causing platform dependence.

#include <stdio.h>
#include <limits.h>
#include <math.h>

void merge(int A[], int p, int q, int r) {
    int n1 = q - p + 1;
    int n2 = r - q;

    int L[n1];
    int R[n2];

    for (int i = 0; i < n1; ++i) {
        L[i] = A[p + i];
    }

    for(int j = 0; j < n2; ++j) {
        R[j] = A[q + j + 1];
    }

    L[n1] = INT_MAX;
    R[n2] = INT_MAX;

    int i = 0;
    int j = 0;

    for (int k = p; k <= r; ++k) {
        if (L[i] <= R[j]) {
            A[k] = L[i];
            i = i + 1;
        } else {
            A[k] = R[j];
            j = j + 1;
        }
    }
}

void merge_recurse(int A[], int p, int r) {
    if (p < r) {
        int q = floor((p + r) / 2);
        merge_recurse(A, p, q);
        merge_recurse(A, q + 1, r);
        merge(A, p, q, r);
    }
}

void merge_sort(int A[], size_t length) {
    merge_recurse(A, 0, (int)length - 1);
}

int main() {
    int length = 9;
    int A[] = { 3, 7, 61, 3, 40, 4, -1, 8, 10 };

    merge_sort(A, length);

    for (int i = 0; i < length; ++i) {
        printf("%i, ", A[i]);
    }

    return 0;
}

When compiled online the correct result is returned.

-1, 3, 3, 4, 7, 8, 10, 40, 61,

However when I compile the source locally on Linux an incorrect result is returned.

-1, 4, 8, 10, 2147483647, 3, 7, 40, 61

What in the source code causes these different results?

chqrlie
  • 131,814
  • 10
  • 121
  • 189
Ivor Denham-Dyson
  • 655
  • 1
  • 5
  • 24

2 Answers2

2

L[n1] = INT_MAX; writes past the end of the array. The declaration int L[n1]; creates an array that can be indexed from 0 to n1-1. There is no L[n1]. Same thing for R[n2] = INT_MAX;

When code writes past the end of an array, it may stomp on another variable in a way that changes the behavior of the code. Or it might not have any observable effect. The online compiler just happened to arrange the variables in memory in a way that nothing bad happened. It's completely unpredictable, and known as undefined behavior.

user3386109
  • 34,287
  • 7
  • 49
  • 68
0

The code has undefined behavior: both L[n1] = INT_MAX; and R[n2] = INT_MAX; write beyond the end of the respective arrays.

Undefined behavior may have no visible effects, which is what you observe on the online compiler or produce incorrect result as you see on your Linux system, or produce catastrophic results, such as a program crash or worse.

Note that your implementation uses a confusing convention and an unsafe method:

  • passing r as the index of the last element is much idiomatic in C than using the index to the following element. With this alternative convention, the length of the slice is simply r - p, the initial call merge_recurse(A, 0, length); and there are no confusing +1/-1 adjustments.
  • using sentinels at the end of the temporary arrays is unsafe because the value INT_MAX may actually be present in the array, causing an incorrect result or defined behavior as index i or j may increase beyond the end of their respective array. This method should not be taught in schools, it is fundamentally flawed. Just test the index boundaries.
  • instead of using int q = floor((p + r) / 2); you should use integer arithmetics:

    int q = p + (r - p) / 2;
    

Here is a modified version:

#include <stdio.h>

void merge(int A[], int p, int q, int r) {
    int n1 = q - p;
    int n2 = r - q;

    int L[n1];
    int R[n2];

    for (int i = 0; i < n1; ++i) {
        L[i] = A[p + i];
    }

    for (int j = 0; j < n2; ++j) {
        R[j] = A[q + j];
    }

    int i = 0;
    int j = 0;

    for (int k = p; k < r; ++k) {
        if (i < n1 && (j >= n2 || L[i] <= R[j])) {
            A[k] = L[i];
            i++;
        } else {
            A[k] = R[j];
            j++;
        }
    }
}

void merge_recurse(int A[], int p, int r) {
    if (r - p > 1) {
        int q = p + (r - p) / 2;
        merge_recurse(A, p, q);
        merge_recurse(A, q, r);
        merge(A, p, q, r);
    }
}

void merge_sort(int A[], size_t length) {
    merge_recurse(A, 0, length);
}

int main() {
    int length = 9;
    int A[] = { 3, 7, 61, 3, 40, 4, -1, 8, 10 };

    merge_sort(A, length);

    for (int i = 0; i < length; ++i) {
        printf("%i, ", A[i]);
    }
    printf("\n");

    return 0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189