1

I am trying to write a program to compile with Xeon Phi and it says there is a segmentation fault? I think it is when I try to fill the arrays with the getc function. I have written this code several different formats, and I understand that this might not be the most efficient, but I need to test it out to see if it will work by parallelizing it

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
//#include <omp.h>

int main()
{
    struct stat buf1;
    struct stat buf2;

    FILE *fp1, *fp2;
    int ch1, ch2;
    clock_t elapsed;
    char fname1[40], fname2[40];

    printf("Enter name of first file:");
    fgets(fname1, 40, stdin);
    while (fname1[strlen(fname1) - 1] == '\n')
    {
        fname1[strlen(fname1) -1] = '\0';
    }

    printf("Enter name of second file:");
    fgets(fname2, 40, stdin);
    while (fname2[strlen(fname2) - 1] == '\n')
    {
        fname2[strlen(fname2) -1] = '\0';
    }

    fp1 = fopen(fname1, "rb");
    if (fp1 == NULL)
    {
        printf("Cannot open %s for reading\n", fname1);
        exit(1);
    }

    fp2 = fopen(fname2, "rb");
    if (fp2 == NULL)
    {
        printf("Cannot open %s for reading\n", fname2);
        exit(1);
    }

    stat(fname1, &buf1);
    size_t size1 = buf1.st_size;

    stat(fname2, &buf2);
    size_t size2 = buf2.st_size;

    printf("Size of file 1: %zd\n", size1);
    printf("Size of file 2: %zd\n", size2);

    elapsed = clock(); // get starting time

    size_t smallest = 0;

    if(size1 < size2)
    {
        smallest = size1;
    }
    else
    {
        smallest = size2;
    }

    printf("Smallest Value: %zu\n", smallest);

    size_t i, j, k;
    size_t data[smallest];
    size_t arry1[smallest];
    size_t arry2[smallest];

    unsigned long long counter = 0;

    for(i = 0; i < smallest; i++)
    {
        data[i] = 1;
        arry1[i] = getc(fp1);
        arry2[i] = getc(fp2);
    }

    //#pragma omp for //reduction(+:counter)
    for(k = 0; k < smallest; k++)
    {
        if((arry1[k] ^ arry2[k]) == 0)
        {
            counter+= data[k];
        }
    }

    fclose (fp1); // close files
    fclose (fp2);

    float percent = (float)counter / (float)smallest * 100.0f;

    printf("Counter: %zu Total: %zu\n", counter, smallest);
    printf("Percentage: %.2f%\n", percent);

    elapsed = clock() - elapsed; // elapsed time
    printf("That took %.2f seconds.\n", (float)elapsed/CLOCKS_PER_SEC);
    return 0;
}

Thanks for your help in advance!

humblebeast
  • 303
  • 3
  • 16

2 Answers2

2

You cannot declare an array with a size that's not known at compile time:

int smallest;

smallest = .... // some computation

size_t data[smallest]; // this is wrong!

You should instead use malloc() to accomplish that:

size_t *data;

smallest = ... // whatever
data = malloc(smallest * sizeof(size_t));
ocho88
  • 125
  • 1
  • 8
  • 1
    Arrays can have runtime size since 1999 , however he might have a stack overflow – M.M Jul 24 '14 at 21:33
  • Shouldn't that last line be `data = (size_t*) malloc(...);`? (Note the `size_t*`) – Jashaszun Jul 24 '14 at 21:35
  • It should be neither of them, [see here](http://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc) – M.M Jul 24 '14 at 22:12
  • Edited again. `malloc()` is type `void *` which is automatically cast, as @MattMcNabb suggests. – ocho88 Jul 25 '14 at 08:00
2

This loop:

while (fname1[strlen(fname1) - 1] == '\n')
    fname1[strlen(fname1) -1] = '\0';

will read off the start of the string if the line was blank (i.e. "\n"). Change while to if.

Also, check that smallest > 0 before declaring the VLAs.

It might be insightful to output the value of smallest, typical systems default to a stack size of somewhere between 1MB and 8MB, so perhaps you cause a stack overflow here. You could eliminate this possibility by using malloc, as ocho88 suggests (but without the bogus cast):

size_t *data = malloc(smallest * sizeof *data);
size_t *arry1 = malloc(smallest * sizeof *arry1);
size_t *arry2 = malloc(smallest * sizeof *arry2);

if ( !data || !arry1 || !arry2 )
     // exit with out-of-memory error

I'm not sure why you use a size_t to store the result of getc.

If this does not solve the problem then it would be useful to identify which line is segfaulting. If you can't get a debugger working, then you can output (to stderr, or to stdout with fflush) to find out where it is getting up to.

M.M
  • 138,810
  • 21
  • 208
  • 365
  • Interesting, I get the segmentation fault when I analyze data that is 10GB big, but it does not occur when I read in 6GB – humblebeast Jul 24 '14 at 22:08
  • So I guess it crashes? Is it possible to have it analyze a data set that large? – humblebeast Jul 24 '14 at 22:08
  • Use `malloc` to avoid the crash. – M.M Jul 24 '14 at 22:10
  • Obviously, at some point you will run into a limit of your system's memory, so you would need to redesign your program to use less memory. Based on the code you've shown so far, one plan would be to use `unsigned char` instead of `size_t` for `arry1`, `arry2` – M.M Jul 24 '14 at 22:11
  • You could also look into reading part of the file and analyzing it, and then reading more of the file and analyzing it, and combining the results. (3DES encryption and other such schemes work this way, they actually only need to use 8 bytes or 16 bytes of storage, the next encrypted block is built from the previous partial result, plus the next block of data) – M.M Jul 24 '14 at 22:17