-1

Trying to run a sorting C program which takes in input via the command line (Mac Terminal). If I manually input data the program works. If I input data from the command line (i.e. time ./hw2 mergesort < 10000.txt ) I get error:

hw2(1368,0x7fffcf79b3c0) malloc: * mach_vm_map(size=18446744065119617024) failed (error code=3) * error: can't allocate region *** set a breakpoint in malloc_error_break to debug ERROR: malloc failed for size: -2147483648 real 1m41.341s user 1m38.316s sys 0m2.406s

  #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>


    #define DEFAULT_SIZE    1024

int merge_sort(int arr[],int low,int high);
int merge(int arr[],int l,int m,int h);


    int* read_input(int* size)
// read input from stdin into array a; return size of array in size parameter

{
    int *a = NULL;
    int next = 0;
    int sz = DEFAULT_SIZE;

    a = malloc(sizeof(int) * sz);
    if (a == NULL)
    {
        fprintf(stderr, "ERROR: malloc failed for size: %d\n", sz);
        exit(1);
    }

    while (!feof (stdin))
    {
        int i = 0;
        if (scanf ("%d", &i) == EOF)
        {
            break;
        }

        a[next++] = i;

        // reached end of array--double size and allocate again;
        if (next == sz)
        {
            sz = 2 * sz;
            a = realloc(a, sizeof(int) * sz);
            if (a == NULL)
            {
                fprintf(stderr, "ERROR: malloc failed for size: %d\n", sz);
                exit(1);
            }
        }
    }

    *size = next;

    printf("READ %d elements into array\n", next);

    return a;
}


int merge_sort(int arr[],int low,int high)
{
    int mid;
    if(low<high)
    {
        mid=(low+high)/2;
        // Divide and Conquer
        merge_sort(arr,low,mid);
        merge_sort(arr,mid+1,high);
        // Combine
        merge(arr,low,mid,high);
    }

    return 0;
}

int merge(int arr[],int l,int m,int h)
{
    int arr1[10],arr2[10];  // Two temporary arrays to
    //  hold the two arrays to be merged
    int n1,n2,i,j,k;
    n1=m-l+1;
    n2=h-m;

    for(i=0;i<n1;i++)
    arr1[i]=arr[l+i];
    for(j=0;j<n2;j++)
    arr2[j]=arr[m+j+1];

    arr1[i]=9999;  // To mark the end of each temporary array
    arr2[j]=9999;

    i=0;j=0;
    for(k=l;k<=h;k++)  //process of combining two sorted arrays
    {
        if(arr1[i]<=arr2[j])
        arr[k]=arr1[i++];
        else
        arr[k]=arr2[j++];
    }

    return 0;
}


int do_merge_sort(int a[], int size)
{

    printf("BEGIN merge_sort...\n");

    merge_sort(a,0, size);

    printf("END merge_sort...\n");

    return 0;
}






    int do_heap_sort(int a[], int size)
// heapsort driver function

{

    printf("BEGIN heap_sort...\n");

    // TO BE FILLED IN

    printf("END heap_sort...\n");

    return 0;
}

// qiocksort driver function
    int do_quick_sort(int a[], int size)


    {

    printf("BEGIN quick_sort...\n");

    // TO BE FILLED IN

    printf("END quick_sort...\n");

    return 0;
}


    int usage()

{

    char *usage_str =
    "./hw2 [-h] mergesort|heapsort|quicksort\n"
    "\n"
    "Driver program to test different sort algorithn performance.\n"
    "\n"
    "Example\n"
    "\n"
    "./hw2 mergesort\n"
    "\n"
    "will test mergesrt\n"
    ;

    fprintf(stderr, "%s\n\n", usage_str);

    exit(1);
}


    int main(int argc, char *argv[])
    // driver function

{
    int *a = NULL;
    int size;
    int ret = 0;

    if (argc < 2)
    {
        fprintf(stderr, "ERROR: at least one argument needed\n");
        usage();
    }

    // read the input into array;
    a = read_input(&size);

    if (strcmp(argv[1], "mergesort") == 0) {
        do_merge_sort(a, size);
    }
    else if (strcmp(argv[1], "heapsort") == 0) {
        do_heap_sort(a, size);
    }
    else if (strcmp(argv[1], "quicksort") == 0) {
        do_quick_sort(a, size);
    }
    else {
        fprintf(stderr, "ERROR: BAD argument\n");
        usage();
    }

    // free allocated memory
    if (a) {
        free(a);
    }

    exit(0);
}

I use this python code to generate random number data:

#! usr/env/bin python
# to generate random data:
# python ./gen_data.py 1 1000000 > 1000000.dat
#
# you can verify that data by
# cat 1000000.dat | sort -g >1000000s.dat
# vi 1000000s.dat
#

import sys
import random

start_num = int(sys.argv[1])
end_num = int(sys.argv[2])

data = range(start_num, end_num)
random.shuffle(data)

for x in range(len(data)):
#    print(str(data[x]) + '\n')
    print(data[x])
Michał Turczyn
  • 32,028
  • 14
  • 47
  • 69
Ahmad Taj
  • 147
  • 1
  • 6

2 Answers2

0

It's possible that your loop’s end condition for no more input isn’t being met. That’s resulting in sz being doubled repeatedly until, as an Int, it loops back around to a negative value. Your kernel then complains that it can’t carry out a request to assign a block of memory that takes up negative space.

Russell Jurek
  • 84
  • 1
  • 1
  • 7
  • I guess you're on the right track here, but requesting a negative size isn't possible, `malloc()` takes a `size_t`. That said, using `int` for sizes might be part of the problem. Or the amount of data is just too much for the available RAM. –  Jul 21 '18 at 12:49
  • You’re correct that an implicit cast of a negative signed int to an unsigned int, will map to a very large value. It should be something exceeding ~2.1 billion for a 32-bit int. That’s about 2GB of memory at a minimum. Multiply that by 32 for the value of sizeof() and you end up with a gigantic memory request. With or without an implicit cast, the root cause is still the same. sz is being doubled too many times. – Russell Jurek Jul 21 '18 at 21:54
0

The malloc call is telling you that the given value (if it is cast as a signed type, because obviously, -1 gives a super large long number when it is red as unsigned) is negative, you should use a size_t when calling a function that request a size_t.

Consider using valgrind to debug your program and see where the "strange call" appears to be, we can't just help you by saying "It does not work please help".

Thus, I think the value that you give to malloc, kinda exceed it's type size, you are doing too much loops that result in too much sz = sz * 2. The value gets bigger and bigger, at some point, malloc is not able to request such amount of memory.