1

I was trying to implement merge sort in C.

But when I test the code I encounter this error c0000374 in my merge sort function when I try to split array into left right array.

enter image description here

The code is as follows.

typedef struct EntryStruct {
    int data;
    char *name;
} Entry;

typedef char *String;

void merge(Entry *output, Entry *L, int nL, Entry *R, int nR) {
    int i = 0;
    int j = 0;
    int k = 0;
    while (k < nL + nR) {
        if ((L[i].data != NULL && L[i].data < R[i].data) || R[j].data == NULL) {
            output[k] = L[i];
            i++;
        } else {
            output[k] = R[j];
            j++;
        }
        k++;
    }
}

void merge_sort(Entry *entries, int n) {
    if (n > 1) {
        int mid = n / 2;
        Entry *temp = (Entry *)malloc(n * sizeof(Entry));
        Entry *left = (Entry *)malloc(mid * sizeof(Entry));
        Entry *right = (Entry *)malloc((n - mid) * sizeof(Entry));

        for (int l = 0; l < mid; l++)
            left[l] = entries[l];

        for (int r = mid; r < n; r++)
            right[r] = entries[r];

        merge_sort(left, mid);
        merge_sort(right, n - mid);
        merge(temp, left, mid, right, n - mid);
        for (int i = 0 ; i < n; i++) {
            entries[i] = temp[i];
        }
        free(temp);
    }
}

Entry Entry_create(int data, String name) {
    Entry node;
    node.name = (String)malloc(strlen(name) + 1);
    strcpy(node.name, name);
    node.data = data;
    return node;
}

void printArrByName(Entry *arr, int s) {
    for (int i = 0; i < s; i++) {
        printf("%s\n", arr[i].name);
    }
}

int main(void) {
    Entry *arr = malloc(5 * sizeof(*arr));
    arr[0] = Entry_create(5, "abc");
    arr[1] = Entry_create(6, "def");
    arr[2] = Entry_create(2, "ghijk");
    arr[3] = Entry_create(3, "ksdljf");
    arr[4] = Entry_create(1, "lsdfjl");
    merge_sort(arr, 5);
    printArrByName(arr, 5);
    free(arr);
}

I want to ask what is the cause of this problem in my case and how to solve it.

Is this happen because I split array in to left right in the wrong way or is it something to do with the initialization of the array.

chqrlie
  • 131,814
  • 10
  • 121
  • 189
TaeXtreme
  • 135
  • 3
  • 11

2 Answers2

2

There are multiple problems in the code causing undefined behavior:

  • [major: undefined behavior] In the merge_sort function, the loop for (int r = mid; r < n; r++) right[r] = entries[r]; accesses the array pointed to by right beyond the end. You should write:

    for (int r = mid; r < n; r++)
        right[r - mid] = entries[r];
    

    This bug is a good candidate to explain the observed behavior as it corrupts the malloc() internal data, causing a subsequent call to malloc() to crash.

  • [major: memory leak] You do not free left, nor right. As a matter of fact, allocating copies of the left and right parts of the array is not even necessary.

  • [major: undefined behavior] In the merge function, you do not test if i is less than nL, nor of j is less than nR before accessing L[i] or R[j]. Testing if the data member is not NULL does not suffice, accessing an element beyond the end of an array has undefined behavior.

  • [minor: unstable sort] L[i].data < R[i].data might not preserve the order of entries that have the same data value. You should use L[i].data <= R[i].data to implement stable sorting.

  • [hint] Defining typedef char *String; is a bad idea. Do not hide pointers behind typedefs, it is confusing and error prone.

Here is a modified version:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct EntryStruct {
    int data;
    char *name;
} Entry;

#ifdef _MSC_VER
// define strdup on legacy systems
char *strdup(const char *s) {
    size_t len = strlen(s);
    char *p = (char *)malloc(len + 1);
    if (p)
        memcpy(p, s, len + 1);
    return p;
}
#endif

void merge(Entry *output, Entry *L, int nL, Entry *R, int nR) {
    int i = 0;
    int j = 0;
    int k = 0;
    while (k < nL + nR) {
        if (i < nL && (j >= nR || L[i].data <= R[j].data)) {
            output[k] = L[i];
            i++;
        } else {
            output[k] = R[j];
            j++;
        }
        k++;
    }
}

void merge_sort(Entry *entries, int n) {
    if (n > 1) {
        int mid = n / 2;
        Entry *temp;
        Entry *left = entries;
        Entry *right = entries + mid;

        merge_sort(left, mid);
        merge_sort(right, n - mid);
        temp = (Entry *)malloc(n * sizeof(Entry));
        merge(temp, left, mid, right, n - mid);
        for (int i = 0; i < n; i++) {
            entries[i] = temp[i];
        }
        free(temp);
    }
}

Entry Entry_create(int data, const char *name) {
    Entry node;
    node.name = strdup(name);
    node.data = data;
    return node;
}

void printArrByName(Entry *arr, int n) {
    for (int i = 0; i < n; i++) {
        printf("%s\n", arr[i].name);
    }
}

int main(void) {
    Entry *arr = malloc(5 * sizeof(*arr));
    arr[0] = Entry_create(5, "abc");
    arr[1] = Entry_create(6, "def");
    arr[2] = Entry_create(2, "ghijk");
    arr[3] = Entry_create(3, "ksdljf");
    arr[4] = Entry_create(1, "lsdfjl");
    merge_sort(arr, 5);
    printArrByName(arr, 5);
    for (int i = 0; i < 5; i++)
        free(arr[i].name);
    free(arr);
    return 0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • I'd categorize those as Major as well. Nice way to do it. – David C. Rankin Jun 12 '20 at 17:29
  • @DavidC.Rankin: agreed. I refined the categories. – chqrlie Jun 12 '20 at 17:31
  • That's even better. Here is the typedef pointers link I have [Is it a good idea to **typedef** pointers?](http://stackoverflow.com/questions/750178/is-it-a-good-idea-to-typedef-pointers) – David C. Rankin Jun 12 '20 at 18:45
  • When i test this version i got `Exception thrown at 0x7A89F338 (ucrtbased.dll) in MergeSort.exe: 0xC0000005: Access violation reading location 0x00000005.` from the if condition line in merge function and is there a missing `;` in `temp = (Entry *)malloc(n * sizeof(Entry))` line in your code – TaeXtreme Jun 13 '20 at 03:20
  • @TaeXtream: my bad, there were multiple problems in my code, I updated the code. The `merge_sort` function sorts the array by numeric order of the `data` field. – chqrlie Jun 13 '20 at 11:15
1

Although not needed for small arrays, and since there are answers based on the questions code, here is a somewhat optimized top down merge sort that avoids copy backs by using a pair of mutually recursive functions (...a2a, ...a2b). An entry function does a one time allocation of the temporary array. On my system, it takes less than .5 second to sort an array of 4 million structures.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct EntryStruct {
    int data;
    char *name;
} Entry;

/* prototypes for mutually recursive functions */
void merge_sort_a2a(Entry *a, Entry *b, int ll, int ee);
void merge_sort_a2b(Entry *a, Entry *b, int ll, int ee);

void merge(Entry *a, Entry *b, int ll, int rr, int ee)
{
int o = ll;                             /* b[]       index */
int l = ll;                             /* a[] left  index */
int r = rr;                             /* a[] right index */
    while(1){
        if(a[l].data <= a[r].data){     /* if a[l] <= a[r] */
            b[o++] = a[l++];            /*  copy a[l] */
            if(l < rr)                  /*   if not end of left run */
                continue;               /*    continue (back to while) */
            while(r < ee)               /*   else copy rest of right run */
                b[o++] = a[r++];
            break;                      /*    and return */
        } else {                        /* else a[l] > a[r] */
            b[o++] = a[r++];            /*  copy a[r] */
            if(r < ee)                  /*   if not end of right run */
                continue;               /*    continue (back to while) */
            while(l < rr)               /*   else copy rest of left run */
                b[o++] = a[l++];
            break;                      /*    and return */
        }
    }
}

void merge_sort_a2a(Entry *a, Entry *b, int ll, int ee)
{
int rr;
    if(ee - ll < 2){            /* if 1 element */
        return;                 /*  return */
    }
    rr = ll + (ee-ll)/2;        /* mid point, start of right run */
    merge_sort_a2b(a, b, ll, rr);
    merge_sort_a2b(a, b, rr, ee);
    merge(b, a, ll, rr, ee);
}

void merge_sort_a2b(Entry *a, Entry *b, int ll, int ee)
{
int rr;
    if(ee - ll < 2){            /* if 1 element */
        b[ll] = a[ll];          /*  copy to b */
        return;
    }
    rr = ll + (ee-ll)/2;        /* mid point, start of right run */
    merge_sort_a2a(a, b, ll, rr);
    merge_sort_a2a(a, b, rr, ee);
    merge(a, b, ll, rr, ee);
}

void merge_sort(Entry *a, int n) {
    if(n < 2)
        return;
    Entry *b = malloc(n * sizeof(Entry));
    merge_sort_a2a(a, b, 0, n);
    free(b);
}

Entry Entry_create(int data, const char *name) {
    Entry node;
    node.name = _strdup(name);      /* _strdup is ISO name */
    node.data = data;
    return node;
}

void printArrByName(Entry *arr, int n) {
    for (int i = 0; i < n; i++) {
        printf("%s\n", arr[i].name);
    }
}

int main(void) {
    Entry *arr = malloc(5 * sizeof(*arr));
    arr[0] = Entry_create(5, "abc");
    arr[1] = Entry_create(6, "def");
    arr[2] = Entry_create(2, "ghijk");
    arr[3] = Entry_create(3, "ksdljf");
    arr[4] = Entry_create(1, "lsdfjl");
    merge_sort(arr, 5);
    printArrByName(arr, 5);
    for (int i = 0; i < 5; i++)
        free(arr[i].name);
    free(arr);
    return 0;
}
rcgldr
  • 27,407
  • 3
  • 36
  • 61